# created a dumpfile from my mysql db # and found that it was too large to # upload to my new db host. # this python script breaks up the database # into smaller pieces that you can more # easily import through phpmyadmin # (if I'd only had shell access I wouldn't # have this problem at all!) # indicates a new table is about to be # created dlmtr = "-- Table structure for table" wholeFile = open("myDBDump.sql") fileN = 0 oFile = open(str(fileN) + ".sql", 'w') reducing = True for line in wholeFile: if line.find(dlmtr) > -1: # this is the seam for the next file print "starting new file" oFile.close() fileN += 1 oFile = open(str(fileN) + ".sql", 'w') oFile.write(line) oFile.close() print "Done"
A python example based blog that shows how to accomplish python goals and how to correct python errors.
Tuesday, October 27, 2009
Python - break a large mysql dump into small dumps
Python - extract a tar.gz archive
# extract a tar.gz file with the tarfile module import tarfile # open the tarfile and use the 'r:gz' parameter # the 'r:gz' mode enables gzip compression reading tfile = tarfile.open("archive.tar.gz", 'r:gz') # 99.9% of the time you just want to extract all # the contents of the archive. tfile.extractall('.') # Maybe this isn't so amazing for you types out # there using *nix, os x, or (anything other than # windows that comes with tar and gunzip scripts). # However, if you find yourself on windows and # need to extract a tar.gz you're in for quite the # freeware/spyware/spamware gauntlet. # Python has everything you need built in! # Hooray for python! print "Done!"
Monday, October 26, 2009
Python - extract or unzip a tar file
# untar a tar file with python # python can open, inspect contents, and extract # tar files with the built-in # tarfile module. import tarfile # tar file to extract theTarFile = 'example.tar' # tar file path to extract extractTarPath = '.' # open the tar file tfile = tarfile.open(theTarFile) if tarfile.is_tarfile(theTarFile): # list all contents print "tar file contents:" print tfile.list(verbose=False) # extract all contents tfile.extractall(extractTarPath) else: print theTarFile + " is not a tarfile."
Labels:
extractall,
is_tarfile,
python,
tarfile
Tuesday, October 20, 2009
Python - reduce a web sites size
# I've recently started using iWeb (which for the non # Mac OS X inclined is the application used to make web # sites and what not). # # After creating a 80 page site I was horrified at the # total size site. Nearly 100 MB!! The site really wasn't # very graphics intensive. Each page had only one image on it # at most! I started examining the file structure and was # horrified to realize that iWeb produces web sites use # huge image files. Not even remotely compressed. # # I needed to rescale the jpg and png files down to # reasonable sizes. from PIL import Image import glob import os # this is the default size for images: size = 256, 256 # I provide 2 different sizes for other files # that need to be larger. # Identify the names of the files that need to # be higher quality. csize0files = 'PhotoGray_nav_bg.png', 'bg_round.jpg' csize0 = 768, 768 # different custom sizes for other 'important files # csize1files = 'nonefiles', 'none' csize1 = 512, 512 # create a list for all the files and then add # them all in type by type. # For my page I just had jpg and png images all_matching_files = [] for i in glob.glob("*/*.jpg"): all_matching_files.append(i) for i in glob.glob("*/*.png"): all_matching_files.append(i) # if you are using this for iWeb checkout the file count! # my 80 page site had 3000+ images!!! print "total images to be resized: " + str(len(all_matching_files)) count = len(all_matching_files) # loop through all the images and make changes for infile in all_matching_files: scalesize = size im = Image.open(infile) # split out all the useful parts of the file's path thePath, theFile = os.path.split(infile) fileName, extension = os.path.splitext(theFile) # custom resize if necessary if theFile in csize1files: scalesize = csize1 elif theFile in csize0files: scalesize = csize0 # resize with PIL's awesome thumbnail method im.thumbnail(scalesize, Image.ANTIALIAS) # save back as appropriate type if extension == ".png": im.save(infile, "PNG") else: im.save(infile, "JPEG") count -= 1 if count % 10 == 0: # output some useful stats print str(count) + " images remaining." print "....done" ## output: ## total images to be resized: 3907 ## 3900 images remaining. ## 3890 images remaining. ## ... [snip].....(there were a lot!) ## 30 images remaining. ## 20 images remaining. ## 10 images remaining. ## 0 images remaining. ## ....done # Running this script reduced my website size # from 96MB to 39MB!! # # Certainly there is still room for improvement # future posts will ideally be aimed at further # efficiency gains.
Monday, October 19, 2009
Python - quickly update urls in a web page
# update a webpages url references. # Your web page has a collection of images and you # want to update all the "folder1" references # to the new folder you've populated called "newfolder" # normally you would open the actual html file # and iterate through the file one line at a time # like in this post. # but for simplicity sake lets just use a multi line string # for the example theString = """ <img src="folder1/pic2324.jpg" /> <img src="folder1/pic2255.png" /> <img src="folder2/pic552.jpg" /> <img src="folder1/pica2f.jpg" /> """ # all you need is to iterate through the # file and replace 'folder1' with 'newfolder' for line in theString.split('\n'): line = line.replace("folder1/", "newfolder/") print line #output: # <img src="newfolder/pic2324.jpg" /> # <img src="newfolder/pic2255.png" /> # <img src="folder2/pic552.jpg" /> # <img src="newfolder/pica2f.jpg" />
Tuesday, October 6, 2009
Python - read robots.txt files with ease
# robot parser allows access to a websites # robots.txt file (more on robots.txt) import robotparser # more on robotparser doc # Note: in python 3 robotparser will be found in # the urllib module at urllib.robotparser # examples using urllib # - copy image (or file) off web # - alter user agent string # - browse the web with python # the site I want to read url = "http://pythonicprose.blogspot.com/robots.txt" rob = robotparser.RobotFileParser() rob.set_url(url) # read and parse through the file rob.read() # if you are creating a web crawler or spider you may need to keep # track of how long it has been since you last read the robots.txt file # use modified to mark the time and mtime to read it rob.modified() # to get the time: rob.mtime() # check and see if any user agent can read the home page print rob.can_fetch("*", "/") # output: # True # check and see if any user agent can read the search page print rob.can_fetch("*", "/search") # output: # False # now that we are so many lines down from set_url we can check # the host we are processing print rob.host # output: # 'pythonicprose.blogspot.com'
Sunday, October 4, 2009
Python - make your own class attributes iterable
# It can be useful to iterate through data contained # in your own custom objects. # Lets say you have your own class class ExampleClass(object): def __init__(self): self.objectList = [] self.objectDict = {} self.maxItem = 100 self.objectItem = "" def iterateList(self): return self.objectList def addListItem(self, item): self.objectList.append(item) def addDictItem(self, item, value): self.objectDict[item] = value # create an instance of the class # and lets use it's iterating methods ec = ExampleClass() # add some example data for i in xrange(10): ec.addListItem(i) ec.addDictItem(i, str(i)+"'s value") # now that we have data lets iterate # through the data for item in ec.iterateList(): print item #output: # 0 # 1 # 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9
Saturday, October 3, 2009
Python - using sqlite3 module for persistant data
# The sqlite3 lets you create and use # a database with just a file import sqlite3 # more detailed python doc sqlite3 import os # in this example we get the current working dir path # Choose the file to use for the # db and connect (create it) conn = sqlite3.connect(os.path.abspath('.') + "tempdb") # grab a cursor and we can create the db schema c = conn.cursor() # if you happen to run through this example a few times # you may notice that the data is persistant. For this example # we'll ensure that we're starting from ground zero # drop the database (if it exists) c.execute('drop table if exists users') # create a table c.execute('create table users (name text, age text, email text)') # insert data c.execute("""insert into users values ('steve', '30', 'blah@blah.com')""") c.execute("""insert into users values ('steve2', '32', 'blah@blah2.com')""") c.execute("""insert into users values ('steve3', '33', 'blah@blah3.com')""") #, # ('steve II', '20', 'blah2@blah.com'), # ('steve III', '10', 'blah3@blah.com')""") # now lets select our data c.execute('select * from users') # iterate through the results with for each for row in c: print row # output: # (u'steve', u'30', u'blah@blah.com') # (u'steve2', u'32', u'blah@blah2.com') # (u'steve3', u'33', u'blah@blah3.com')
Python - create unit tests and ensure accurate documentation with doctest
""" The doctest module uses class and method documentation to run unit tests on your code. The doctest module reads the coding documentation you've created and uses that same documentation to conduct unit tests. This helps ensure the documentation is accurate and creates a one stop destination for documentation and unit tests. """ class ExampleClass(object): """ Example class that has one working method. >>> ec = ExampleClass() >>> ec.example(10) 19 >>> ec = ExampleClass() >>> ec.example(0) -1 # non int parameters should # return nothing >>> ec = ExampleClass() >>> ec.example("apple") >>> ec = ExampleClass() >>> ec.a = 3 >>> ec.example(10) 30 """ def __init__(self): self.a = 2 self.b = -1 def example(self, n): try: n = int(n) except ValueError: return None return n * self.a + self.b if __name__ == '__main__': # more about doctest features import doctest doctest.testmod() # this outputs: # nothing at all # if no errors are found then doctest doesn't complain # If you were to change some of the expected values in the # documention....for instance... the last example: # >>> ec = ExampleClass() # >>> ec.a = 3 # >>> ec.example(10) # 29 # and change the expected response to 30 # # # the output would be: # ********************************************************************** # File "C:\python\docstringexample.py", line 27, in __main__.ExampleClass # Failed example: # ec.example(10) # Expected: # 30 # Got: # 29 # ********************************************************************** # 1 items had failures: # 1 of 9 in __main__.ExampleClass # ***Test Failed*** 1 failures.
Labels:
class,
doctest,
documentation,
python,
unit test
Python - storing persistance objects in file with shelve
# The shelve module is used to store objects in a file. # You use the file like a glorified dict with key, value # pairs. import shelve # shelve python doc objList = [] filename = 'shelveFile.shelve' # open and or create the file file = shelve.open(filename) # Here is an example class we'll create # instances of and then store in the file class ExampleClass(object): def __init__(self): self.a = 0 self.b = 1 self.c = 2 self.k = 0 def getTotal(self): return self.a + self.b + self.c # create several instances for i in xrange(3): obj = ExampleClass() obj.k = i obj.a = i+1 obj.b = i+2 obj.c = i+3 objList.append(obj) # now add the objects to file object for i in objList: # keys are strings file[str(i.k)] = i # The sync command will explicitly # write changes to file file.sync() # Closing the object will also execute # the sync command file.close() # The file (and the 3 objects in it # are now saved. # Now we'll reopen and verify the data is there file2 = shelve.open(filename) # Iterate through and print out # the object attributes (to verify # they are the values we assigned previously) for i in file2.keys(): j = file2[str(i)] print "a,b,c,k = ", j.a, j.b, j.c, j.k #output: #a,b,c,k = 1 2 3 0 #a,b,c,k = 3 4 5 2 #a,b,c,k = 2 3 4 1 # You can edit these values. # Here will change all 'a' attributes to 7 for i in file2.keys(): # Take note of how these changes were made. # You cannot merely alter an attribute # like file2[str(i)].a = 7 (this will # not work). j = file2[str(i)] j.a = 7 file2[str(j.k)] = j # And verify that changes are made: for i in file2.keys(): j = file2[str(i)] print "a,b,c,k = ", j.a, j.b, j.c, j.k #output: #a,b,c,k = 7 2 3 0 #a,b,c,k = 7 4 5 2 #a,b,c,k = 7 3 4 1 # now close the shelve file so you can # use the data objects another day. file2.close()
Thursday, October 1, 2009
Python - using filecmp to compare two or more files
# The filecmp module is a portable way to check # whether two (or more) files are the same. # The module only has two methods: # cmp(file1, file2) # cmpfiles(directory1, directory2, common) import filecmp # find more verbose filecmp python docs here # check whether two files are the same # of course you need to have a /etc/hosts and .bak # for this example to work (feel free to change to filenames # that do exist on your setup) if filecmp.cmp('/etc/hosts', '/etc/hosts.bak'): print "the files are the same" else: print "the files are not the same" # filecmp also allows you to compare directories # you can use cmpfiles which returns 3 tuples: # matches, mismatch, error match, mismatch, error = filecmp.cmpfiles('folder1', 'folder2', ['LICENSE.TXT', 'README.TXT', 'VERSION']) print "Matching: ", match print "Mismatched: ", mismatch print "Errors: ", error # output (for me): # Matching: ['LICENSE.TXT', 'README.TXT'] # Mismatched: ['LICENSE'] # Errors: []
Python - copy or move files and directories
# shutil is used for high level copy and move needs. # shutil can operate on individual files or recursively # on a directory structure. import shutil # shutil python doc # of course these examples the # file is in the current working directory # it supports simply copying files print "just copy file contents..." shutil.copy("hero.bmp", "hero2.bmp") # The second hero2.bmp is now created. # However, all attributes of the file have been reset # like creation dates and what not (depending # on the file type) # to copy the stats from the first to the new copied # file you can use the copystat method. # To fix the first examples lack of stat copying print "copy stats..." shutil.copystat("hero.bmp", "hero2.bmp") # shutil can also copy an entire directory tree with copytree # stats are copied for the files. print "recursively copy directory tree..." shutil.copytree('C:/tmp','C:/newtmp') # shutil can also remove a directory tree print "remove the copied directory tree..." shutil.rmtree('C:/newtmp') #
Python - using glob to get lists of files and directories
import os.path # glob is a simple and useful python module. # It uses simple regular expressions to match # directories and files for a given path. If # you've ever used the command line to 'ls' or # 'dir' the currently directory you may be aware # that the directory accepts * or ? or [] to # match patterns. glob is a python implementation # of this functionality. import glob import os # find all the .txt files in the current working directory print glob.glob('*.TXT') # output: # ['LICENSE.txt', 'NEWS.txt', 'README.txt'] # you can also specify a full path # Here I'm searching for dll files in python 2.6 print glob.glob('C:\Python26\DLLs\*.dll') # output: # ['C:\\Python26\\DLLs\\sqlite3.dll', # 'C:\\Python26\\DLLs\\tcl85.dll', # 'C:\\Python26\\DLLs\\tclpip85.dll', # 'C:\\Python26\\DLLs\\tk85.dll'] # If you are expecting a great deal of results # you should use the glob.iglob method that returns # matches as it goes and does not load everything # into memory first. # glob.iglob() example f = glob.iglob('C:\Python26\Lib\*') spitItOut = True while spitItOut: try: fileNameAndPath = f.next() # since glob gives you the full path you can # use the output with some of the os module's methods if os.path.isfile(fileNameAndPath): fileNameAndPath += " is a file." else: fileNameAndPath += " is not a file." print fileNameAndPath except StopIteration: spitItOut = False #output (snipped a bit...since there a lot): # C:\Python26\Lib\abc.py is a file. # ....[snip] # C:\Python26\Lib\compiler is not a file. # ...[another snip] # C:\Python26\Lib\getopt.py is a file. # C:\Python26\Lib\getopt.pyc is a file. # C:\Python26\Lib\getpass.py is a file. # C:\Python26\Lib\gettext.py is a file. # C:\Python26\Lib\glob.py is a file. # C:\Python26\Lib\glob.pyc is a file.
Python - printing complex objects with pretty printing
# Pretty printing (using the pprint module) transforms # python objects into human readable output. # # Use pprint when you need to display a complex # data structure to users. import string import pprint # pprint python doc d = {} for i in string.ascii_lowercase: d[i] = string.ascii_lowercase.replace(i, ' ') print "not useful output:" print d # output: # not useful output: # {'a': ' bcdefghijklmnopqrstuvwxyz', 'c': 'ab defghijklmnopqrstuvwxyz', 'b': # 'a cdefghijklmnopqrstuvwxyz', 'e': 'abcd fghijklmnopqrstuvwxyz', 'd': 'abc # efghijklmnopqrstuvwxyz', 'g': 'abcdef hijklmnopqrstuvwxyz', 'f': 'abcde ghij # klmnopqrstuvwxyz', 'i': 'abcdefgh jklmnopqrstuvwxyz', 'h': 'abcdefg ijklmnop # qrstuvwxyz', 'k': 'abcdefghij lmnopqrstuvwxyz', 'j': 'abcdefghi klmnopqrstuv # wxyz', 'm': 'abcdefghijkl nopqrstuvwxyz', 'l': 'abcdefghijk mnopqrstuvwxyz', # 'o': 'abcdefghijklmn pqrstuvwxyz', 'n': 'abcdefghijklm opqrstuvwxyz', 'q': ' # abcdefghijklmnop rstuvwxyz', 'p': 'abcdefghijklmno qrstuvwxyz', 's': 'abcdef # ghijklmnopqr tuvwxyz', 'r': 'abcdefghijklmnopq stuvwxyz', 'u': 'abcdefghijkl # mnopqrst vwxyz', 't': 'abcdefghijklmnopqrs uvwxyz', 'w': 'abcdefghijklmnopqr # stuv xyz', 'v': 'abcdefghijklmnopqrstu wxyz', 'y': 'abcdefghijklmnopqrstuvwx # z', 'x': 'abcdefghijklmnopqrstuvw yz', 'z': 'abcdefghijklmnopqrstuvwxy '} # # All the data is there but it is difficult to read. # You can use pprint (pretty print) to make things easy to read. pprint # formats python datastructures to be human readable. print "human readable output:" pprint.pprint(d, indent=4) # output: #human readable output: #{ 'a': ' bcdefghijklmnopqrstuvwxyz', # 'b': 'a cdefghijklmnopqrstuvwxyz', # 'c': 'ab defghijklmnopqrstuvwxyz', # 'd': 'abc efghijklmnopqrstuvwxyz', # 'e': 'abcd fghijklmnopqrstuvwxyz', # 'f': 'abcde ghijklmnopqrstuvwxyz', # 'g': 'abcdef hijklmnopqrstuvwxyz', # 'h': 'abcdefg ijklmnopqrstuvwxyz', # 'i': 'abcdefgh jklmnopqrstuvwxyz', # 'j': 'abcdefghi klmnopqrstuvwxyz', # 'k': 'abcdefghij lmnopqrstuvwxyz', # 'l': 'abcdefghijk mnopqrstuvwxyz', # 'm': 'abcdefghijkl nopqrstuvwxyz', # 'n': 'abcdefghijklm opqrstuvwxyz', # 'o': 'abcdefghijklmn pqrstuvwxyz', # 'p': 'abcdefghijklmno qrstuvwxyz', # 'q': 'abcdefghijklmnop rstuvwxyz', # 'r': 'abcdefghijklmnopq stuvwxyz', # 's': 'abcdefghijklmnopqr tuvwxyz', # 't': 'abcdefghijklmnopqrs uvwxyz', # 'u': 'abcdefghijklmnopqrst vwxyz', # 'v': 'abcdefghijklmnopqrstu wxyz', # 'w': 'abcdefghijklmnopqrstuv xyz', # 'x': 'abcdefghijklmnopqrstuvw yz', # 'y': 'abcdefghijklmnopqrstuvwx z', # 'z': 'abcdefghijklmnopqrstuvwxy '} # # Formatted in this fashion its easy to see what # data is being stored in the dict.
Labels:
dict,
pprint,
PrettyPrinter,
python,
string
Python - hash with md5 and sha1 (and others!)
# There a many reasons to hash data. # For this example we'll say that we # want to has passwords so we can store # them in a database (or file)...or for # this example a variable # hashlib encapsulates the following functionality: # md5 # sha1 # sha224 # sha256 # sha384 # sha512 import hashlib # When a user creates their account they'll input # a password. For security purposes you hash # the password and store it (so they can log into # their account later). password = "$uperP@a$$w0rd" #pass the password to the sha1 constructor createSha1 = hashlib.sha1(password) #dump the password out in text sha1_password = createSha1.hexdigest() print sha1_password #output: # 2d0b537e6673e1f6baf1c462cd4922dab32ee243 # You'll notice that sha1 creates a 40 character hash. # All hashed strings (regardless of original size) will # be represented by sha1 as 40 characters. print len(sha1_password) #output: # 40 # You can store that hashed password and then later the # user will attempt to login. Take their password and hash # it with the same algorithm (sha1 in our example). password_attempt_1 = "superP@a$$w0rd" password_attempt_2 = "$up3rP@a$$w0rd" password_attempt_3 = "$uperP@a$$w0rd" #take the attempts and hash them so you can compare passwords attempt1 = hashlib.sha1(password_attempt_1) if sha1_password == attempt1.hexdigest(): print "password attempt 1 is a success" else: print "password attempt 1 is a failure" attempt2 = hashlib.sha1(password_attempt_2) if sha1_password == attempt2.hexdigest(): print "password attempt 2 is a success" else: print "password attempt 2 is a failure" attempt3 = hashlib.sha1(password_attempt_3) if sha1_password == attempt3.hexdigest(): print "password attempt 3 is a success" else: print "password attempt 3 is a failure" #output: # password attempt 1 is a failure # password attempt 2 is a failure # password attempt 3 is a success # Now that you understand how to use sha1 you # understand how to use all of the supported # algorithms in hashlib. They all use the same # methods so you can easily adapt your code to # any of the hash types. # Check out the python docs for hashlib
Subscribe to:
Posts (Atom)