# python scripts are often run from the # command line. # python can retrieve and use command line # arguments with the sys module. import sys # all arguments are stored in the sys.argv list print "number of arguments passed: ", len(sys.argv) # process through the argument list for argument in sys.argv: print argument # my input/output: # python commandlinearguments.py one two three four five six seven # number of arguments passed: 8 # commandlinearguments.py # one # two # three # four # five # six # seven
A python example based blog that shows how to accomplish python goals and how to correct python errors.
Thursday, November 19, 2009
Python - processing command line arguments
Wednesday, November 18, 2009
Python - boolean and, or, and not
# python includes the basic and, # or, and not boolean operations a = 10 b = 20 c = 30 if a>9 and b+1==21 and c==a+b: print "boolean 'and' example equates true" if a==9 or b<10 or c<100: print "only one of these expressions needs to be true" if not a+b==31: print "if this is not true"
Tuesday, October 27, 2009
Python - break a large mysql dump into small dumps
# created a dumpfile from my mysql db # and found that it was too large to # upload to my new db host. # this python script breaks up the database # into smaller pieces that you can more # easily import through phpmyadmin # (if I'd only had shell access I wouldn't # have this problem at all!) # indicates a new table is about to be # created dlmtr = "-- Table structure for table" wholeFile = open("myDBDump.sql") fileN = 0 oFile = open(str(fileN) + ".sql", 'w') reducing = True for line in wholeFile: if line.find(dlmtr) > -1: # this is the seam for the next file print "starting new file" oFile.close() fileN += 1 oFile = open(str(fileN) + ".sql", 'w') oFile.write(line) oFile.close() print "Done"
Python - extract a tar.gz archive
# extract a tar.gz file with the tarfile module import tarfile # open the tarfile and use the 'r:gz' parameter # the 'r:gz' mode enables gzip compression reading tfile = tarfile.open("archive.tar.gz", 'r:gz') # 99.9% of the time you just want to extract all # the contents of the archive. tfile.extractall('.') # Maybe this isn't so amazing for you types out # there using *nix, os x, or (anything other than # windows that comes with tar and gunzip scripts). # However, if you find yourself on windows and # need to extract a tar.gz you're in for quite the # freeware/spyware/spamware gauntlet. # Python has everything you need built in! # Hooray for python! print "Done!"
Monday, October 26, 2009
Python - extract or unzip a tar file
# untar a tar file with python # python can open, inspect contents, and extract # tar files with the built-in # tarfile module. import tarfile # tar file to extract theTarFile = 'example.tar' # tar file path to extract extractTarPath = '.' # open the tar file tfile = tarfile.open(theTarFile) if tarfile.is_tarfile(theTarFile): # list all contents print "tar file contents:" print tfile.list(verbose=False) # extract all contents tfile.extractall(extractTarPath) else: print theTarFile + " is not a tarfile."
Labels:
extractall,
is_tarfile,
python,
tarfile
Tuesday, October 20, 2009
Python - reduce a web sites size
# I've recently started using iWeb (which for the non # Mac OS X inclined is the application used to make web # sites and what not). # # After creating a 80 page site I was horrified at the # total size site. Nearly 100 MB!! The site really wasn't # very graphics intensive. Each page had only one image on it # at most! I started examining the file structure and was # horrified to realize that iWeb produces web sites use # huge image files. Not even remotely compressed. # # I needed to rescale the jpg and png files down to # reasonable sizes. from PIL import Image import glob import os # this is the default size for images: size = 256, 256 # I provide 2 different sizes for other files # that need to be larger. # Identify the names of the files that need to # be higher quality. csize0files = 'PhotoGray_nav_bg.png', 'bg_round.jpg' csize0 = 768, 768 # different custom sizes for other 'important files # csize1files = 'nonefiles', 'none' csize1 = 512, 512 # create a list for all the files and then add # them all in type by type. # For my page I just had jpg and png images all_matching_files = [] for i in glob.glob("*/*.jpg"): all_matching_files.append(i) for i in glob.glob("*/*.png"): all_matching_files.append(i) # if you are using this for iWeb checkout the file count! # my 80 page site had 3000+ images!!! print "total images to be resized: " + str(len(all_matching_files)) count = len(all_matching_files) # loop through all the images and make changes for infile in all_matching_files: scalesize = size im = Image.open(infile) # split out all the useful parts of the file's path thePath, theFile = os.path.split(infile) fileName, extension = os.path.splitext(theFile) # custom resize if necessary if theFile in csize1files: scalesize = csize1 elif theFile in csize0files: scalesize = csize0 # resize with PIL's awesome thumbnail method im.thumbnail(scalesize, Image.ANTIALIAS) # save back as appropriate type if extension == ".png": im.save(infile, "PNG") else: im.save(infile, "JPEG") count -= 1 if count % 10 == 0: # output some useful stats print str(count) + " images remaining." print "....done" ## output: ## total images to be resized: 3907 ## 3900 images remaining. ## 3890 images remaining. ## ... [snip].....(there were a lot!) ## 30 images remaining. ## 20 images remaining. ## 10 images remaining. ## 0 images remaining. ## ....done # Running this script reduced my website size # from 96MB to 39MB!! # # Certainly there is still room for improvement # future posts will ideally be aimed at further # efficiency gains.
Monday, October 19, 2009
Python - quickly update urls in a web page
# update a webpages url references. # Your web page has a collection of images and you # want to update all the "folder1" references # to the new folder you've populated called "newfolder" # normally you would open the actual html file # and iterate through the file one line at a time # like in this post. # but for simplicity sake lets just use a multi line string # for the example theString = """ <img src="folder1/pic2324.jpg" /> <img src="folder1/pic2255.png" /> <img src="folder2/pic552.jpg" /> <img src="folder1/pica2f.jpg" /> """ # all you need is to iterate through the # file and replace 'folder1' with 'newfolder' for line in theString.split('\n'): line = line.replace("folder1/", "newfolder/") print line #output: # <img src="newfolder/pic2324.jpg" /> # <img src="newfolder/pic2255.png" /> # <img src="folder2/pic552.jpg" /> # <img src="newfolder/pica2f.jpg" />
Tuesday, October 6, 2009
Python - read robots.txt files with ease
# robot parser allows access to a websites # robots.txt file (more on robots.txt) import robotparser # more on robotparser doc # Note: in python 3 robotparser will be found in # the urllib module at urllib.robotparser # examples using urllib # - copy image (or file) off web # - alter user agent string # - browse the web with python # the site I want to read url = "http://pythonicprose.blogspot.com/robots.txt" rob = robotparser.RobotFileParser() rob.set_url(url) # read and parse through the file rob.read() # if you are creating a web crawler or spider you may need to keep # track of how long it has been since you last read the robots.txt file # use modified to mark the time and mtime to read it rob.modified() # to get the time: rob.mtime() # check and see if any user agent can read the home page print rob.can_fetch("*", "/") # output: # True # check and see if any user agent can read the search page print rob.can_fetch("*", "/search") # output: # False # now that we are so many lines down from set_url we can check # the host we are processing print rob.host # output: # 'pythonicprose.blogspot.com'
Sunday, October 4, 2009
Python - make your own class attributes iterable
# It can be useful to iterate through data contained # in your own custom objects. # Lets say you have your own class class ExampleClass(object): def __init__(self): self.objectList = [] self.objectDict = {} self.maxItem = 100 self.objectItem = "" def iterateList(self): return self.objectList def addListItem(self, item): self.objectList.append(item) def addDictItem(self, item, value): self.objectDict[item] = value # create an instance of the class # and lets use it's iterating methods ec = ExampleClass() # add some example data for i in xrange(10): ec.addListItem(i) ec.addDictItem(i, str(i)+"'s value") # now that we have data lets iterate # through the data for item in ec.iterateList(): print item #output: # 0 # 1 # 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9
Saturday, October 3, 2009
Python - using sqlite3 module for persistant data
# The sqlite3 lets you create and use # a database with just a file import sqlite3 # more detailed python doc sqlite3 import os # in this example we get the current working dir path # Choose the file to use for the # db and connect (create it) conn = sqlite3.connect(os.path.abspath('.') + "tempdb") # grab a cursor and we can create the db schema c = conn.cursor() # if you happen to run through this example a few times # you may notice that the data is persistant. For this example # we'll ensure that we're starting from ground zero # drop the database (if it exists) c.execute('drop table if exists users') # create a table c.execute('create table users (name text, age text, email text)') # insert data c.execute("""insert into users values ('steve', '30', 'blah@blah.com')""") c.execute("""insert into users values ('steve2', '32', 'blah@blah2.com')""") c.execute("""insert into users values ('steve3', '33', 'blah@blah3.com')""") #, # ('steve II', '20', 'blah2@blah.com'), # ('steve III', '10', 'blah3@blah.com')""") # now lets select our data c.execute('select * from users') # iterate through the results with for each for row in c: print row # output: # (u'steve', u'30', u'blah@blah.com') # (u'steve2', u'32', u'blah@blah2.com') # (u'steve3', u'33', u'blah@blah3.com')
Python - create unit tests and ensure accurate documentation with doctest
""" The doctest module uses class and method documentation to run unit tests on your code. The doctest module reads the coding documentation you've created and uses that same documentation to conduct unit tests. This helps ensure the documentation is accurate and creates a one stop destination for documentation and unit tests. """ class ExampleClass(object): """ Example class that has one working method. >>> ec = ExampleClass() >>> ec.example(10) 19 >>> ec = ExampleClass() >>> ec.example(0) -1 # non int parameters should # return nothing >>> ec = ExampleClass() >>> ec.example("apple") >>> ec = ExampleClass() >>> ec.a = 3 >>> ec.example(10) 30 """ def __init__(self): self.a = 2 self.b = -1 def example(self, n): try: n = int(n) except ValueError: return None return n * self.a + self.b if __name__ == '__main__': # more about doctest features import doctest doctest.testmod() # this outputs: # nothing at all # if no errors are found then doctest doesn't complain # If you were to change some of the expected values in the # documention....for instance... the last example: # >>> ec = ExampleClass() # >>> ec.a = 3 # >>> ec.example(10) # 29 # and change the expected response to 30 # # # the output would be: # ********************************************************************** # File "C:\python\docstringexample.py", line 27, in __main__.ExampleClass # Failed example: # ec.example(10) # Expected: # 30 # Got: # 29 # ********************************************************************** # 1 items had failures: # 1 of 9 in __main__.ExampleClass # ***Test Failed*** 1 failures.
Labels:
class,
doctest,
documentation,
python,
unit test
Python - storing persistance objects in file with shelve
# The shelve module is used to store objects in a file. # You use the file like a glorified dict with key, value # pairs. import shelve # shelve python doc objList = [] filename = 'shelveFile.shelve' # open and or create the file file = shelve.open(filename) # Here is an example class we'll create # instances of and then store in the file class ExampleClass(object): def __init__(self): self.a = 0 self.b = 1 self.c = 2 self.k = 0 def getTotal(self): return self.a + self.b + self.c # create several instances for i in xrange(3): obj = ExampleClass() obj.k = i obj.a = i+1 obj.b = i+2 obj.c = i+3 objList.append(obj) # now add the objects to file object for i in objList: # keys are strings file[str(i.k)] = i # The sync command will explicitly # write changes to file file.sync() # Closing the object will also execute # the sync command file.close() # The file (and the 3 objects in it # are now saved. # Now we'll reopen and verify the data is there file2 = shelve.open(filename) # Iterate through and print out # the object attributes (to verify # they are the values we assigned previously) for i in file2.keys(): j = file2[str(i)] print "a,b,c,k = ", j.a, j.b, j.c, j.k #output: #a,b,c,k = 1 2 3 0 #a,b,c,k = 3 4 5 2 #a,b,c,k = 2 3 4 1 # You can edit these values. # Here will change all 'a' attributes to 7 for i in file2.keys(): # Take note of how these changes were made. # You cannot merely alter an attribute # like file2[str(i)].a = 7 (this will # not work). j = file2[str(i)] j.a = 7 file2[str(j.k)] = j # And verify that changes are made: for i in file2.keys(): j = file2[str(i)] print "a,b,c,k = ", j.a, j.b, j.c, j.k #output: #a,b,c,k = 7 2 3 0 #a,b,c,k = 7 4 5 2 #a,b,c,k = 7 3 4 1 # now close the shelve file so you can # use the data objects another day. file2.close()
Thursday, October 1, 2009
Python - using filecmp to compare two or more files
# The filecmp module is a portable way to check # whether two (or more) files are the same. # The module only has two methods: # cmp(file1, file2) # cmpfiles(directory1, directory2, common) import filecmp # find more verbose filecmp python docs here # check whether two files are the same # of course you need to have a /etc/hosts and .bak # for this example to work (feel free to change to filenames # that do exist on your setup) if filecmp.cmp('/etc/hosts', '/etc/hosts.bak'): print "the files are the same" else: print "the files are not the same" # filecmp also allows you to compare directories # you can use cmpfiles which returns 3 tuples: # matches, mismatch, error match, mismatch, error = filecmp.cmpfiles('folder1', 'folder2', ['LICENSE.TXT', 'README.TXT', 'VERSION']) print "Matching: ", match print "Mismatched: ", mismatch print "Errors: ", error # output (for me): # Matching: ['LICENSE.TXT', 'README.TXT'] # Mismatched: ['LICENSE'] # Errors: []
Python - copy or move files and directories
# shutil is used for high level copy and move needs. # shutil can operate on individual files or recursively # on a directory structure. import shutil # shutil python doc # of course these examples the # file is in the current working directory # it supports simply copying files print "just copy file contents..." shutil.copy("hero.bmp", "hero2.bmp") # The second hero2.bmp is now created. # However, all attributes of the file have been reset # like creation dates and what not (depending # on the file type) # to copy the stats from the first to the new copied # file you can use the copystat method. # To fix the first examples lack of stat copying print "copy stats..." shutil.copystat("hero.bmp", "hero2.bmp") # shutil can also copy an entire directory tree with copytree # stats are copied for the files. print "recursively copy directory tree..." shutil.copytree('C:/tmp','C:/newtmp') # shutil can also remove a directory tree print "remove the copied directory tree..." shutil.rmtree('C:/newtmp') #
Python - using glob to get lists of files and directories
import os.path # glob is a simple and useful python module. # It uses simple regular expressions to match # directories and files for a given path. If # you've ever used the command line to 'ls' or # 'dir' the currently directory you may be aware # that the directory accepts * or ? or [] to # match patterns. glob is a python implementation # of this functionality. import glob import os # find all the .txt files in the current working directory print glob.glob('*.TXT') # output: # ['LICENSE.txt', 'NEWS.txt', 'README.txt'] # you can also specify a full path # Here I'm searching for dll files in python 2.6 print glob.glob('C:\Python26\DLLs\*.dll') # output: # ['C:\\Python26\\DLLs\\sqlite3.dll', # 'C:\\Python26\\DLLs\\tcl85.dll', # 'C:\\Python26\\DLLs\\tclpip85.dll', # 'C:\\Python26\\DLLs\\tk85.dll'] # If you are expecting a great deal of results # you should use the glob.iglob method that returns # matches as it goes and does not load everything # into memory first. # glob.iglob() example f = glob.iglob('C:\Python26\Lib\*') spitItOut = True while spitItOut: try: fileNameAndPath = f.next() # since glob gives you the full path you can # use the output with some of the os module's methods if os.path.isfile(fileNameAndPath): fileNameAndPath += " is a file." else: fileNameAndPath += " is not a file." print fileNameAndPath except StopIteration: spitItOut = False #output (snipped a bit...since there a lot): # C:\Python26\Lib\abc.py is a file. # ....[snip] # C:\Python26\Lib\compiler is not a file. # ...[another snip] # C:\Python26\Lib\getopt.py is a file. # C:\Python26\Lib\getopt.pyc is a file. # C:\Python26\Lib\getpass.py is a file. # C:\Python26\Lib\gettext.py is a file. # C:\Python26\Lib\glob.py is a file. # C:\Python26\Lib\glob.pyc is a file.
Python - printing complex objects with pretty printing
# Pretty printing (using the pprint module) transforms # python objects into human readable output. # # Use pprint when you need to display a complex # data structure to users. import string import pprint # pprint python doc d = {} for i in string.ascii_lowercase: d[i] = string.ascii_lowercase.replace(i, ' ') print "not useful output:" print d # output: # not useful output: # {'a': ' bcdefghijklmnopqrstuvwxyz', 'c': 'ab defghijklmnopqrstuvwxyz', 'b': # 'a cdefghijklmnopqrstuvwxyz', 'e': 'abcd fghijklmnopqrstuvwxyz', 'd': 'abc # efghijklmnopqrstuvwxyz', 'g': 'abcdef hijklmnopqrstuvwxyz', 'f': 'abcde ghij # klmnopqrstuvwxyz', 'i': 'abcdefgh jklmnopqrstuvwxyz', 'h': 'abcdefg ijklmnop # qrstuvwxyz', 'k': 'abcdefghij lmnopqrstuvwxyz', 'j': 'abcdefghi klmnopqrstuv # wxyz', 'm': 'abcdefghijkl nopqrstuvwxyz', 'l': 'abcdefghijk mnopqrstuvwxyz', # 'o': 'abcdefghijklmn pqrstuvwxyz', 'n': 'abcdefghijklm opqrstuvwxyz', 'q': ' # abcdefghijklmnop rstuvwxyz', 'p': 'abcdefghijklmno qrstuvwxyz', 's': 'abcdef # ghijklmnopqr tuvwxyz', 'r': 'abcdefghijklmnopq stuvwxyz', 'u': 'abcdefghijkl # mnopqrst vwxyz', 't': 'abcdefghijklmnopqrs uvwxyz', 'w': 'abcdefghijklmnopqr # stuv xyz', 'v': 'abcdefghijklmnopqrstu wxyz', 'y': 'abcdefghijklmnopqrstuvwx # z', 'x': 'abcdefghijklmnopqrstuvw yz', 'z': 'abcdefghijklmnopqrstuvwxy '} # # All the data is there but it is difficult to read. # You can use pprint (pretty print) to make things easy to read. pprint # formats python datastructures to be human readable. print "human readable output:" pprint.pprint(d, indent=4) # output: #human readable output: #{ 'a': ' bcdefghijklmnopqrstuvwxyz', # 'b': 'a cdefghijklmnopqrstuvwxyz', # 'c': 'ab defghijklmnopqrstuvwxyz', # 'd': 'abc efghijklmnopqrstuvwxyz', # 'e': 'abcd fghijklmnopqrstuvwxyz', # 'f': 'abcde ghijklmnopqrstuvwxyz', # 'g': 'abcdef hijklmnopqrstuvwxyz', # 'h': 'abcdefg ijklmnopqrstuvwxyz', # 'i': 'abcdefgh jklmnopqrstuvwxyz', # 'j': 'abcdefghi klmnopqrstuvwxyz', # 'k': 'abcdefghij lmnopqrstuvwxyz', # 'l': 'abcdefghijk mnopqrstuvwxyz', # 'm': 'abcdefghijkl nopqrstuvwxyz', # 'n': 'abcdefghijklm opqrstuvwxyz', # 'o': 'abcdefghijklmn pqrstuvwxyz', # 'p': 'abcdefghijklmno qrstuvwxyz', # 'q': 'abcdefghijklmnop rstuvwxyz', # 'r': 'abcdefghijklmnopq stuvwxyz', # 's': 'abcdefghijklmnopqr tuvwxyz', # 't': 'abcdefghijklmnopqrs uvwxyz', # 'u': 'abcdefghijklmnopqrst vwxyz', # 'v': 'abcdefghijklmnopqrstu wxyz', # 'w': 'abcdefghijklmnopqrstuv xyz', # 'x': 'abcdefghijklmnopqrstuvw yz', # 'y': 'abcdefghijklmnopqrstuvwx z', # 'z': 'abcdefghijklmnopqrstuvwxy '} # # Formatted in this fashion its easy to see what # data is being stored in the dict.
Labels:
dict,
pprint,
PrettyPrinter,
python,
string
Python - hash with md5 and sha1 (and others!)
# There a many reasons to hash data. # For this example we'll say that we # want to has passwords so we can store # them in a database (or file)...or for # this example a variable # hashlib encapsulates the following functionality: # md5 # sha1 # sha224 # sha256 # sha384 # sha512 import hashlib # When a user creates their account they'll input # a password. For security purposes you hash # the password and store it (so they can log into # their account later). password = "$uperP@a$$w0rd" #pass the password to the sha1 constructor createSha1 = hashlib.sha1(password) #dump the password out in text sha1_password = createSha1.hexdigest() print sha1_password #output: # 2d0b537e6673e1f6baf1c462cd4922dab32ee243 # You'll notice that sha1 creates a 40 character hash. # All hashed strings (regardless of original size) will # be represented by sha1 as 40 characters. print len(sha1_password) #output: # 40 # You can store that hashed password and then later the # user will attempt to login. Take their password and hash # it with the same algorithm (sha1 in our example). password_attempt_1 = "superP@a$$w0rd" password_attempt_2 = "$up3rP@a$$w0rd" password_attempt_3 = "$uperP@a$$w0rd" #take the attempts and hash them so you can compare passwords attempt1 = hashlib.sha1(password_attempt_1) if sha1_password == attempt1.hexdigest(): print "password attempt 1 is a success" else: print "password attempt 1 is a failure" attempt2 = hashlib.sha1(password_attempt_2) if sha1_password == attempt2.hexdigest(): print "password attempt 2 is a success" else: print "password attempt 2 is a failure" attempt3 = hashlib.sha1(password_attempt_3) if sha1_password == attempt3.hexdigest(): print "password attempt 3 is a success" else: print "password attempt 3 is a failure" #output: # password attempt 1 is a failure # password attempt 2 is a failure # password attempt 3 is a success # Now that you understand how to use sha1 you # understand how to use all of the supported # algorithms in hashlib. They all use the same # methods so you can easily adapt your code to # any of the hash types. # Check out the python docs for hashlib
Wednesday, September 30, 2009
Python - regular expression backreference example
# use the power of regular expressions # bite the bullet and review the regular expression syntax import re # lets say you have created the next search engine # your search engine extracts the contents of # the <title></title> tags theString = """ <lots of garbage and # what not and this title is going to be cool> <myTitle> will be awesome. And once you get <title>the title is here</title> and then there is the end """ # you compile a regular expression to search # for the contents of the title tag # (this is where the regular expression syntax http://docs.python.org/library/re.html#regular-expression-syntax # comes in handy) # the one thing to certainly notice is that there are # parenthesis surrounding the contents of the title tag. # These are called backreferences. Once we've run the search # we'll be able to reference these. p = re.compile('<title>(.+)<\/title>') # now search theString m = re.search(p, theString) # you can test whether or not your # regular expression was successfull if m: print "regular expression search successfull!" # referencing group #1 references the first backreference print "the title contents are:", m.group(1) # group # 0 is the entire regular expression result print "the entire regular expression returned:", m.group(0) else: print "regular expression search returns no results" #output: # regular expression search successfull! # the title contents are: the title is here # the entire regular expression returned: <title>the title is here</title>
Labels:
backreference,
compile,
group,
python,
re,
regular expressions,
search
Python - using yaml for configuration files
import yaml # checkout and download yaml for python # you should probably put this config in a seperate file # but for this example it is just a multi-line string yamlConfigFile = """ cars: car0: type: toyota hp: 129 mpg: city: 30 highway: 35 cost: 15,000 car1: type: gm hp: 225 mpg: city: 20 highway: 25 cost: 20,000 car2: type: chevy hp: 220 mpg: city: 22 highway: 24 cost: 21,000 """ # the yaml file will be converted to a dict # for sub sections the dict will nest dicts theDict = yaml.load(yamlConfigFile) print theDict # output (I added some tabs and what not so you # could see the nested dict structure): # {'cars': # {'car2': # {'mpg': {'city': 22, 'highway': 24}, # 'hp': 220, # 'cost': '21,000', # 'type': 'chevy'}, # 'car0': # {'mpg': {'city': 30, 'highway': 35}, # 'hp': 129, # 'cost': '15,000', # 'type': 'toyota'}, # 'car1': # {'mpg': {'city': 20, 'highway': 25}, # 'hp': 225, # 'cost': '20,000', # 'type': 'gm'} # } #} # to list the car types (like car1, car2, etc print theDict['cars'].keys() # output: # ['car2', 'car0', 'car1'] # to display the type and cost of the vehicles for c in theDict['cars'].keys(): print theDict['cars'][c]['type'], "cost:", theDict['cars'][c]['cost'] # output: # chevy cost: 21,000 # toyota cost: 15,000 # gm cost: 20,000 # update the cost of toyota theDict['cars']['car0']['cost'] = '25,000' # the update is now in the dict representation of the yaml file # to dump the yaml dict back to a file # or in our case a multi-line string use the dump command # which you could write to a file print yaml.dump(theDict) # output: # cars: # car0: # cost: 25,000 # hp: 129 # mpg: {city: 30, highway: 35} # type: toyota # car1: # cost: 20,000 # hp: 225 # mpg: {city: 20, highway: 25} # type: gm # car2: # cost: 21,000 # hp: 220 # mpg: {city: 22, highway: 24} # type: chevy
Tuesday, September 29, 2009
Python - simple regular expression examples
# regular expressions are extremely powerful # here are some simple examples to get you started import re text = "Some example text to manipulate with regular expressions." # find the location of all the vowels # iterate through all vowels # here I've used the finditer method to return and # and iterator through the results for i in re.finditer('[aeiouy]', text): print "location:", i.start(), " to ", i.end() print " found text was: ", text[i.start():i.end()] # output: #location: 1 to 2 # found text was: o #location: 3 to 4 # found text was: e # # SNIP -- there are lots of vowels # #location: 49 to 50 # found text was: e #location: 52 to 53 # found text was: i #location: 53 to 54 # found text was: o # use regular expressions to split your sentence into words sentence = "This is my example sentence" for word in re.split(' ', sentence): print word #Output: # This # is # my # example # sentence # search and replace regular expression functionality # replace 'regular expression' with 're' text = "regular expression text goes here" newText = re.sub('regular expression', 're', text) print newText #Outputs: # re text goes here
Python - generate double dutch
# this example is similar # to the double dutch generator def createDoubleDutch(word): ''' create and return a double dutch version of word ''' for v in ("a", "e", "i", "o", "u", "y"): # double dutch-ize each vowel word = word.replace(v, v+"b"+v) return word if __name__ == '__main__': ddSentence = "" for w in "My sample sentence for double dutch".split(' '): ddSentence += createDoubleDutch(w) + " " print ddSentence.strip() #output: # Myby sabamplebe sebentebencebe fobor doboubublebe dubutch
Monday, September 28, 2009
Python - pig latin generator
def makePigLatin(word): """ convert one word into pig latin """ m = len(word) vowels = "a", "e", "i", "o", "u", "y" # short words are not converted if m<3 or word=="the": return word else: for i in vowels: if word.find(i) < m and word.find(i) != -1: m = word.find(i) if m==0: return word+"way" else: return word[m:]+word[:m]+"ay" sentence = "Hooray for pig latin" pigLatinSentence = "" # iterate through words in sentence for w in sentence.split(' '): pigLatinSentence += makePigLatin(w) + " " print pigLatinSentence.strip() # output: # oorayHay orfay igpay atinlay
python - split paragraph into sentences with regular expressions
# split up a paragraph into sentences # using regular expressions def splitParagraphIntoSentences(paragraph): ''' break a paragraph into sentences and return a list ''' import re # to split by multile characters # regular expressions are easiest (and fastest) sentenceEnders = re.compile('[.!?]') sentenceList = sentenceEnders.split(paragraph) return sentenceList if __name__ == '__main__': p = """This is a sentence. This is an excited sentence! And do you think this is a question?""" sentences = splitParagraphIntoSentences(p) for s in sentences: print s.strip() #output: # This is a sentence # This is an excited sentence # And do you think this is a question
Python - detect and label objects in images
Image to be analyzed
Detected Objects have now been outlined
from PIL import Image # you'll need to get PIL # some other (shorter) scripts # that use PIL: # create a thumbnail with PIL # find the average image RGB # replace image colors with PIL # # this script is based on the # find the sun script class TheOutliner(object): ''' takes a dict of xy points and draws a rectangle around them ''' def __init__(self): self.outlineColor = 0, 255, 255 self.pic = None self.picn = None self.minX = 0 self.minY = 0 self.maxX = 0 self.maxY = 0 def doEverything(self, imgPath, dictPoints, theoutfile): self.loadImage(imgPath) self.loadBrightPoints(dictPoints) self.drawBox() self.saveImg(theoutfile) def loadImage(self, imgPath): self.pic = Image.open(imgPath) self.picn = self.pic.load() def loadBrightPoints(self, dictPoints): '''iterate through all points and gather max/min x/y ''' # an x from the pool (the max/min # must be from dictPoints) self.minX = dictPoints.keys()[0][0] self.maxX = self.minX self.minY = dictPoints.keys()[0][1] self.maxY = self.minY for point in dictPoints.keys(): if point[0] < self.minX: self.minX = point[0] elif point[0] > self.maxX: self.maxX = point[0] if point[1]< self.minY: self.minY = point[1] elif point[1] > self.maxY: self.maxY = point[1] def drawBox(self): # drop box around bright points for x in xrange(self.minX, self.maxX): # top bar self.picn[x, self.minY] = self.outlineColor # bottom bar self.picn[x, self.maxY] = self.outlineColor for y in xrange(self.minY, self.maxY): # left bar self.picn[self.minX, y] = self.outlineColor # right bar self.picn[self.maxX, y] = self.outlineColor def saveImg(self, theoutfile): self.pic.save(theoutfile, "JPEG") #class CollectBrightPoints(object): # # def __init__(self): # self.brightThreshold = 240, 240, 240 # self.pic = None # self.picn = None # self.brightDict = {} # def loadImage(self, imgPath): # self.pic = Image.open(imgPath) # self.picn = self.pic.load() # def collectBrightPoints(self): # for x in xrange(self.pic.size[0]): # for y in xrange(self.pic.size[1]): # r,g,b = self.picn[x,y] # if r > self.brightThreshold[0] and \ # g > self.brightThreshold[1] and \ # b > self.brightThreshold[2]: # # then it is brighter than our threshold # self.brightDict[x,y] = r,g,b class ObjectDetector(object): ''' returns a list of dicts representing all the objects in the image ''' def __init__(self): self.detail = 4 self.objects = [] self.size = 1000 self.no = 255 self.close = 100 self.pic = None self.picn = None self.brightDict = {} def loadImage(self, imgPath): self.pic = Image.open(imgPath) self.picn = self.pic.load() self.picSize = self.pic.size self.detail = (self.picSize[0] + self.picSize[1])/2000 self.size = (self.picSize[0] + self.picSize[1])/8 # each must be at least 1 -- and the larger # the self.detail is the faster the analyzation will be self.detail += 1 self.size += 1 def getSurroundingPoints(self, xy): ''' returns list of adjoining point ''' x = xy[0] y = xy[1] plist = ( (x-self.detail, y-self.detail), (x, y-self.detail), (x+self.detail, y-self.detail), (x-self.detail, y),(x+self.detail, y), (x-self.detail, y+self.detail),(x, y+self.detail),(x+self.detail,y+self.detail) ) return (plist) def getRGBFor(self, x, y): try: return self.picn[x,y] except IndexError as e: return 255,255,255 def readyToBeEvaluated(self, xy): try: r,g,b = self.picn[xy[0],xy[1]] if r==255 and g==255 and b==255: return False except: return False return True def markEvaluated(self, xy): try: self.picn[xy[0],xy[1]] = self.no, self.no, self.no except: pass def collectAllObjectPoints(self): for x in xrange(self.pic.size[0]): if x % self.detail == 0: for y in xrange(self.pic.size[1]): if y % self.detail == 0: r,g,b = self.picn[x,y] if r == self.no and \ g == self.no and \ b == self.no: # then no more pass else: ol = {} ol[x,y] = "go" pp = [] pp.append((x,y)) stillLooking = True while stillLooking: if len(pp) > 0: xe, ye = pp.pop() # look for adjoining points for p in self.getSurroundingPoints((xe,ye)): if self.readyToBeEvaluated((p[0], p[1])): r2,g2,b2 = self.getRGBFor(p[0], p[1]) if abs(r-r2) < self.close and \ abs(g-g2) < self.close and \ abs(b-b2) < self.close: # then its close enough ol[p[0],p[1]] = "go" pp.append((p[0],p[1])) self.markEvaluated((p[0],p[1])) self.markEvaluated((xe,ye)) else: # done expanding that point stillLooking = False if len(ol) > self.size: self.objects.append(ol) if __name__ == "__main__": print "Start Process"; # assumes that the .jpg files are in # working directory theFile = "3.jpg" theOutFile = "3.output.jpg" import os os.listdir('.') for f in os.listdir('.'): if f.find(".jpg") > 0: theFile = f print "working on " + theFile + "..." theOutFile = theFile + ".out.jpg" bbb = ObjectDetector() bbb.loadImage(theFile) print " analyzing.." print " file dimensions: " + str(bbb.picSize) print " this files object weight: " + str(bbb.size) print " this files analyzation detail: " + str(bbb.detail) bbb.collectAllObjectPoints() print " objects detected: " +str(len(bbb.objects)) drawer = TheOutliner() print " loading and drawing rectangles.." drawer.loadImage(theFile) for o in bbb.objects: drawer.loadBrightPoints(o) drawer.drawBox() print "saving image..." drawer.saveImg(theOutFile) print "Process complete" #output #Start Process #working on A Good Book to Have on Your Shelf.jpg... # analyzing.. # file dimensions: (500, 667) # this files object weight: 146 # this files analyzation detail: 1 # objects detected: 6 # loading and drawing rectangles.. #saving image... #Process complete #working on bamboo-forest.jpg... # analyzing.. # file dimensions: (640, 480) # this files object weight: 141 # this files analyzation detail: 1 # objects detected: 68 # loading and drawing rectangles.. #saving image... # # .............. SNIP .... (I had 20 jpeg files in the dir) # #working on Family_Photo.jpg... # analyzing.. # file dimensions: (4200, 3300) # this files object weight: 938 # this files analyzation detail: 4 # objects detected: 20 # loading and drawing rectangles.. #saving image... #Process complete
Saturday, September 26, 2009
Python - sun image detector - outline objects in an image
The input:
Where oh where is the sun?
Where oh where is the sun?
from PIL import Image # find brightest region of image # and visually identify the region class TheOutliner(object): def __init__(self): self.outlineColor = 0, 255, 255 self.pic = None self.picn = None self.minX = 0 self.minY = 0 self.maxX = 0 self.maxY = 0 def doEverything(self, imgPath, dictPoints, theoutfile): self.loadImage(imgPath) self.loadBrightPoints(dictPoints) self.drawBox() self.saveImg(theoutfile) def loadImage(self, imgPath): self.pic = Image.open(imgPath) self.picn = self.pic.load() def loadBrightPoints(self, dictPoints): # iterate through all points and # gather max/min x/y # an x from the pool (the max/min # must be from dictPoints) self.minX = dictPoints.keys()[0][0] self.maxX = self.minX self.minY = dictPoints.keys()[0][1] self.maxY = self.minY for point in dictPoints.keys(): if point[0] < self.minX: self.minX = point[0] elif point[0] > self.maxX: self.maxX = point[0] if point[1] < self.minY: self.minY = point[1] elif point[1] > self.maxY: self.maxY = point[1] def drawBox(self): # drop box around bright points for x in xrange(self.minX, self.maxX): # top bar self.picn[x, self.minY] = self.outlineColor # bottom bar self.picn[x, self.maxY] = self.outlineColor for y in xrange(self.minY, self.maxY): # left bar self.picn[self.minX, y] = self.outlineColor # right bar self.picn[self.maxX, y] = self.outlineColor def saveImg(self, theoutfile): self.pic.save(theoutfile, "JPEG") class CollectBrightPoints(object): def __init__(self): self.brightThreshold = 240, 240, 240 self.pic = None self.picn = None self.brightDict = {} def loadImage(self, imgPath): self.pic = Image.open(imgPath) self.picn = self.pic.load() def collectBrightPoints(self): for x in xrange(self.pic.size[0]): for y in xrange(self.pic.size[1]): r,g,b = self.picn[x,y] if r>self.brightThreshold[0] and \ g > self.brightThreshold[1] and \ b > self.brightThreshold[2]: # then it is brighter than our threshold self.brightDict[x,y] = r,g,b if __name__ == "__main__": print "Start Process"; # assumes that the test.jpg is in the # working directory theFile = "four.jpg" theOutFile = "four.output.jpg" cbp = CollectBrightPoints() cbp.loadImage(theFile) cbp.collectBrightPoints() brightDict = cbp.brightDict drawer = TheOutliner() drawer.doEverything(theFile, brightDict, theOutFile) print "Process complete"
The output: The sun has been detected!
Subscribe to:
Posts (Atom)