Showing posts with label glob. Show all posts
Showing posts with label glob. Show all posts

Wednesday, February 24, 2010

Python - Bulk rename a directory of files

 
# I found myself with the need to rename my video 
# collection.  For some reason I decided that using 
# spaces in a file name is lame....underscores should 
# always be used.  After two or three files of manually 
# renaming I decided that python could do all the 
# work for me. 
 
import os
import glob
 
# My video collection is all matroska files.  So 
# the extension of them all is *.mkv format. 
files_to_change = '*.mkv' 
 
# new and old versions of a space 
lame_space = ' ';
cool_space = '_';
 
# use glob to gather a list of matching files 
for f in glob.glob(files_to_change):
        f2 = f
        f2 = f2.replace(lame_space, cool_space)
        # add a little status for instant gratification 
        print 'renaming: ', f, ' -> ', f2
        os.rename(f, f2)
 
print 'All Done' 
 
 
## my output: 
# 
# renaming:  Me at home.mkv  ->  Me_at_home.mkv 
# renaming:  Max in kid pool.mkv  ->  Max_in_kid_pool.mkv 
# ........ < and so on > 
# All Done 
 
 

Tuesday, October 20, 2009

Python - reduce a web sites size

# I've recently started using iWeb (which for the non
# Mac OS X inclined is the application used to make web

# sites and what not).
#
# After creating a 80 page site I was horrified at the
# total size site.  Nearly 100 MB!! The site really wasn't
# very graphics intensive.  Each page had only one image on it
# at most!  I started examining the file structure and was
# horrified to realize that iWeb produces web sites use
# huge image files.  Not even remotely compressed.

#
# I needed to rescale the jpg and png files down to
# reasonable sizes.


from PIL import Image
import glob

import os

# this is the default size for images:
size = 256, 256

# I provide 2 different sizes for other files
# that need to be larger.
# Identify the names of the files that need to
# be higher quality.
csize0files = 'PhotoGray_nav_bg.png', 'bg_round.jpg'

csize0 = 768, 768

# different custom sizes for other 'important files
#
csize1files = 'nonefiles', 'none'
csize1 = 512, 512

# create a list for all the files and then add
# them all in type by type.
# For my page I just had jpg and png images

all_matching_files = []
for i in glob.glob("*/*.jpg"):
    all_matching_files.append(i)

for i in glob.glob("*/*.png"):
    all_matching_files.append(i)


# if you are using this for iWeb checkout the file count!
# my 80 page site had 3000+ images!!!
print "total images to be resized: " + str(len(all_matching_files))
count = len(all_matching_files)

# loop through all the images and make changes
for infile in all_matching_files:
    scalesize = size
    im = Image.open(infile)
    # split out all the useful parts of the file's path

    thePath, theFile = os.path.split(infile)
    fileName, extension = os.path.splitext(theFile)
    # custom resize if necessary
    if theFile in csize1files:
        scalesize = csize1
    elif theFile in csize0files:
        scalesize = csize0

    # resize with PIL's awesome thumbnail method

    im.thumbnail(scalesize, Image.ANTIALIAS)

    # save back as appropriate type
    if extension == ".png":
        im.save(infile, "PNG")
    else:
        im.save(infile, "JPEG")

    count -= 1
    if count % 10 == 0:
        # output some useful stats

        print str(count) + " images remaining."

print "....done"

## output:
##    total images to be resized: 3907

##    3900 images remaining.
##    3890 images remaining.
##    ... [snip].....(there were a lot!)
##    30 images remaining.
##    20 images remaining.
##    10 images remaining.
##    0 images remaining.
##    ....done

# Running this script reduced my website size
# from 96MB to 39MB!!
#
# Certainly there is still room for improvement
# future posts will ideally be aimed at further
# efficiency gains.


Thursday, October 1, 2009

Python - using glob to get lists of files and directories

import os.path
# glob is a simple and useful python module. 
# It uses simple regular expressions to match 
# directories and files for a given path.  If 
# you've ever used the command line to 'ls' or 
# 'dir' the currently directory you may be aware 
# that the directory accepts * or ? or [] to 
# match patterns.  glob is a python implementation 
# of this functionality. 
 
import glob
import os
 
# find all the .txt files in the current working directory  
print glob.glob('*.TXT')
# output: 
# ['LICENSE.txt', 'NEWS.txt', 'README.txt'] 
 
# you can also specify a full path 
# Here I'm searching for dll files in python 2.6 
print glob.glob('C:\Python26\DLLs\*.dll')
# output: 
#    ['C:\\Python26\\DLLs\\sqlite3.dll', 
#    'C:\\Python26\\DLLs\\tcl85.dll', 
#    'C:\\Python26\\DLLs\\tclpip85.dll', 
#    'C:\\Python26\\DLLs\\tk85.dll'] 
 
# If you are expecting a great deal of results 
# you should use the glob.iglob method that returns 
# matches as it goes and does not load everything 
# into memory first. 
# glob.iglob() example 
f = glob.iglob('C:\Python26\Lib\*')
 
spitItOut = True
while spitItOut:
    try:
        fileNameAndPath = f.next()
        # since glob gives you the full path you can 
        # use the output with some of the os module's methods 
        if os.path.isfile(fileNameAndPath):
            fileNameAndPath += " is a file." 
        else:
            fileNameAndPath += " is not a file." 
        print fileNameAndPath
    except StopIteration:
        spitItOut = False
 
#output (snipped a bit...since there a lot): 
#    C:\Python26\Lib\abc.py is a file. 
#    ....[snip] 
#    C:\Python26\Lib\compiler is not a file. 
#    ...[another snip] 
#    C:\Python26\Lib\getopt.py is a file. 
#    C:\Python26\Lib\getopt.pyc is a file. 
#    C:\Python26\Lib\getpass.py is a file. 
#    C:\Python26\Lib\gettext.py is a file. 
#    C:\Python26\Lib\glob.py is a file. 
#    C:\Python26\Lib\glob.pyc is a file.