# Split a paragraph into lines and find # the frequency and line number of words. from operator import itemgetter # words and locations are stored in a dict wordDict = {} # the text we will parse text = ''' this is the text on line one. this is line two text. here is the text of line number three.''' def groupWords(text): lineCount = 0 # break down by lines for line in text.split('\n'): line = line.strip(".,!?:;'") # strip puncuation lineCount += 1 upLine = line.upper() # words are words..case no matter # break line into words for word in upLine.split(): if wordDict.has_key(word): # then add to the key tempValue = wordDict[word] wordDict[word] = str(tempValue) + " " + str(lineCount) else: # add it wordDict[word] = " " + str(lineCount) groupWords(text) # alphabetical output for k in sorted(wordDict.iterkeys()): print k + str(wordDict[k]) # my output: ## HERE 3 ## IS 1 2 3 ## LINE 1 2 3 ## NUMBER 3 ## OF 3 ## ON 1 ## ONE 1 ## TEXT 1 2 3 ## THE 1 3 ## THIS 1 2 ## THREE 3 ## TWO 2 # most frequent style output # put the dict in a list wordList = [] for k in wordDict.iterkeys(): wordList.append((str(len(wordDict[k].replace(' ',''))), str(k), wordDict[k])) for word in sorted(wordList, key=itemgetter(0), reverse=True): print word[0], word[1], ":"+word[2] # my output: ## 3 TEXT : 1 2 3 ## 3 IS : 1 2 3 ## 3 LINE : 1 2 3 ## 2 THIS : 1 2 ## 2 THE : 1 3 ## 1 ON : 1 ## 1 TWO : 2 ## 1 HERE : 3 ## 1 ONE : 1 ## 1 NUMBER : 3 ## 1 OF : 3 ## 1 THREE : 3
A python example based blog that shows how to accomplish python goals and how to correct python errors.
Wednesday, April 14, 2010
Python - frequency and location of words in text
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment