#!/usr/bin/env python """ This module parses Slashdot comment headers for analysis. """ # # content = ''' Re:one could argue (Score:1) # # by [108]Dwonis (52652) [109]Alter Relationship # # <[110]dlitzNO@SPAMdlitz.net> on 10:44 PM January 14th, 2003 # # # # Re:one could argue (Score:2) # # by [108]Dwonis (52652) [109]Alter Relationship # # <[110]dlitzNO@SPAMdlitz.net> on 10:44 PM January 15th, 2003 # # # # Re:one could argue (Score:3) # # by [108]Dwonis (52652) [109]Alter Relationship # # <[110]dlitzNO@SPAMdlitz.net> on 10:44 PM January 15th, 2003''' import os, re, string from time import * results = {} # http://developers.slashdot.org/article.pl?sid=03/01/14/1514205 content = open("slash-comments.txt", "rb").read() story_posted = "01:43 PM January 14th, 2003" n_story_posted = strptime(story_posted, "%H:%M %p %b %dth, %Y") n_story_posted = mktime(n_story_posted) prog = re.compile('\(Score:(.*?)[\,,\)].*? on (.*?2003)',re.DOTALL) headers = prog.findall(content) for (score, date) in headers: score = int(score) comment = string.join(string.split(date)) n_comment = strptime(comment, "%H:%M %p %b %dth, %Y") n_comment = mktime(n_comment) difference = (n_comment - n_story_posted) / 3660 # print "score: %s, elapsed time: %s" % (score, difference) results.setdefault(score,[]).append(difference) import stats print "Score with Mean, Minimum, and Maximum Elapsed Time in Hours" print "score mean minimum maximum" for key in [5,4,3,2,1,0,-1]: print "%+2d %2.2f %2.2f %2.2f" \ % (key, stats.mean(results[key]), min(results[key]), max(results[key])) #print stats.lhistogram(results[key])