Showing posts with label sort. Show all posts
Showing posts with label sort. Show all posts

Wednesday, April 14, 2010

Python - frequency and location of words in text

# Split a paragraph into lines and find 
# the frequency and line number of words. 
 
from operator import itemgetter
 
# words and locations are stored in a dict 
wordDict = {}
 
# the text we will parse 
text = ''' this is the text on line one. 
this is line two text. 
here is the text of line number three.''' 
 
 
def groupWords(text):
    lineCount = 0
    # break down by lines 
    for line in text.split('\n'):
        line = line.strip(".,!?:;'") # strip puncuation 
        lineCount += 1
        upLine = line.upper() # words are words..case no matter 
        # break line into words 
        for word in upLine.split():
            if wordDict.has_key(word):
                # then add to the key 
                tempValue = wordDict[word]
                wordDict[word] = str(tempValue) + " " + str(lineCount)
            else:
                # add it 
                wordDict[word] = " " + str(lineCount)
 
groupWords(text)
 
# alphabetical output 
for k in sorted(wordDict.iterkeys()):
    print k + str(wordDict[k])
 
# my output: 
##  HERE 3 
##  IS 1 2 3 
##  LINE 1 2 3 
##  NUMBER 3 
##  OF 3 
##  ON 1 
##  ONE 1 
##  TEXT 1 2 3 
##  THE 1 3 
##  THIS 1 2 
##  THREE 3 
##  TWO 2 
 
# most frequent style output 
# put the dict in a list 
wordList = []
for k in wordDict.iterkeys():
    wordList.append((str(len(wordDict[k].replace(' ',''))),
                    str(k),
                    wordDict[k]))
 
 
for word in sorted(wordList, key=itemgetter(0), reverse=True):
    print word[0], word[1], ":"+word[2]
 
# my output: 
##  3 TEXT : 1 2 3 
##  3 IS : 1 2 3 
##  3 LINE : 1 2 3 
##  2 THIS : 1 2 
##  2 THE : 1 3 
##  1 ON : 1 
##  1 TWO : 2 
##  1 HERE : 3 
##  1 ONE : 1 
##  1 NUMBER : 3 
##  1 OF : 3 
##  1 THREE : 3 
 
 

Tuesday, September 15, 2009

Python - custom sort a list

# sort a list on your own criteria

# define your own method for sorting (must return 1, 0, -1)

def mysort(x,y):
    x = len(x)
    y = len(y)
    if x>y:
        return 1
    elif x==y:
        return 0
    else:
        return -1


alist = ['Here', 'is', 'a', 'list',
        'of', 'small', 'and', 'gybungusly',
        'big', 'words']

alist.sort(mysort)


# the list is now sorted from smallest to largest word
print alist

#output:
# ['a', 'is', 'of', 'and',

#   'big', 'Here', 'list',
#   'small', 'words', 'gybungusly']

# if your needs just include needing
# to sort alphabetically you can use  
# the typical sort() method
#

Python - reorder a sentence alphabetically

# reorder the words in a sentence alphabetically


def sentenceAlphabetizer(sentence):
    words = sentence.split(' ')
    words.sort()
    sentence = ""
    for word in words:
        sentence += word + " "
    return sentence.strip()



if __name__ == '__main__':
    print sentenceAlphabetizer("basic applepie zoo party")

#output:
#   'applepie basic party zoo'