Pythonic Prose: os

Showing posts with label os. Show all posts

Friday, August 20, 2010

Python - limit cpu percentage for script

# It is sometimes useful to monitor how much cpu time or 
# cpu percentage your script is consuming. 
# This script will limit the cpu usage of your script 
 
# This example demostrates how to calculate the system 
# and user cpu time and cpu percentage 
 
# Note: this example is in python 3.0 
#   however, it is easily ported back to 2.x 
#   by replaceing print() with print 
 
import os
import time
 
def getPercentage(unew, uold, start):
    """ 
    calculate the percentage of cpu time 
    """ 
    return 100 * (float(unew) - float(uold)) / (time.time()-float(start))
 
def looper(timeCount, percentageGoal):
    """ 
    loop over many tasks and keep the total cpu percentage 
    consumtion to a desired level 
    """ 
    start = time.time()
    time.sleep(0.1)
    keepLooping = True
    uold, sold, cold, c, e = os.times()
    percentage = 0.0
    while keepLooping:
        unew, snew, cnew, c, e = os.times()
        # since we are calculating the times from before we started looping the 
        # percentages will be averaged over the duration of the script. 
        print ("user %", percentage)
 
        # This just toggles to stop looping 
        # when a time has been reached. In a real 
        # script you would check for more work and 
        # toggle off when there is no more work to 
        # be done. 
        if time.time()-start > timeCount:
            keepLooping = False
        #else: 
        #    print( time.time()-start) 
 
        # do work: 
        #   In order for this script to actually help limit 
        #   the cpu usage you would need to break your script into 
        #   sections. 
        #   For example: if you were going to iterate through a large 
        #       list of data and perform actions on the contents 
        #       of the list then you should perform on action here 
        #       and keep looping through until all the actions 
        #       are accomplished. 
        # 
        # in this case we're just eating cpu so we get some numbers 
        print("do work...")
        for i in range(1,1000000):
            b = 8*342*i*234
 
        # tone back cpu usage 
        while True:
            percentage = getPercentage(unew, uold, start)
            if percentage > percentageGoal:
                time.sleep(0.1)
            else:
                break;
 
if __name__ == '__main__':
    # loop through work (for 4 seconds) and keep the cpu % 
    # to less than 30% 
    looper(4, 30)
 
## my output: 
##      user % 0.0 
##      do work... 
##      user % 0.0 
##      do work... 
##      user % 29.6673831301 
##      do work... 
##      user % 29.1137166495 
##      do work... 
##      user % 29.7617156875 
##      do work... 
##      user % 29.5707887319 
##      do work... 
##      user % 29.8122197706 
##      do work... 
##      user % 29.3053848216 
##      do work... 
##      user % 29.9385051866 
##      do work...

Wednesday, February 24, 2010

Python - Bulk rename a directory of files

 
# I found myself with the need to rename my video 
# collection.  For some reason I decided that using 
# spaces in a file name is lame....underscores should 
# always be used.  After two or three files of manually 
# renaming I decided that python could do all the 
# work for me. 
 
import os
import glob
 
# My video collection is all matroska files.  So 
# the extension of them all is *.mkv format. 
files_to_change = '*.mkv' 
 
# new and old versions of a space 
lame_space = ' ';
cool_space = '_';
 
# use glob to gather a list of matching files 
for f in glob.glob(files_to_change):
        f2 = f
        f2 = f2.replace(lame_space, cool_space)
        # add a little status for instant gratification 
        print 'renaming: ', f, ' -> ', f2
        os.rename(f, f2)
 
print 'All Done' 
 
 
## my output: 
# 
# renaming:  Me at home.mkv  ->  Me_at_home.mkv 
# renaming:  Max in kid pool.mkv  ->  Max_in_kid_pool.mkv 
# ........ < and so on > 
# All Done

Tuesday, October 20, 2009

Python - reduce a web sites size

# I've recently started using iWeb (which for the non
# Mac OS X inclined is the application used to make web

# sites and what not).
#
# After creating a 80 page site I was horrified at the
# total size site.  Nearly 100 MB!! The site really wasn't
# very graphics intensive.  Each page had only one image on it
# at most!  I started examining the file structure and was
# horrified to realize that iWeb produces web sites use
# huge image files.  Not even remotely compressed.

#
# I needed to rescale the jpg and png files down to
# reasonable sizes.


from PIL import Image
import glob

import os

# this is the default size for images:
size = 256, 256

# I provide 2 different sizes for other files
# that need to be larger.
# Identify the names of the files that need to
# be higher quality.
csize0files = 'PhotoGray_nav_bg.png', 'bg_round.jpg'

csize0 = 768, 768

# different custom sizes for other 'important files
#
csize1files = 'nonefiles', 'none'
csize1 = 512, 512

# create a list for all the files and then add
# them all in type by type.
# For my page I just had jpg and png images

all_matching_files = []
for i in glob.glob("*/*.jpg"):
    all_matching_files.append(i)

for i in glob.glob("*/*.png"):
    all_matching_files.append(i)


# if you are using this for iWeb checkout the file count!
# my 80 page site had 3000+ images!!!
print "total images to be resized: " + str(len(all_matching_files))
count = len(all_matching_files)

# loop through all the images and make changes
for infile in all_matching_files:
    scalesize = size
    im = Image.open(infile)
    # split out all the useful parts of the file's path

    thePath, theFile = os.path.split(infile)
    fileName, extension = os.path.splitext(theFile)
    # custom resize if necessary
    if theFile in csize1files:
        scalesize = csize1
    elif theFile in csize0files:
        scalesize = csize0

    # resize with PIL's awesome thumbnail method

    im.thumbnail(scalesize, Image.ANTIALIAS)

    # save back as appropriate type
    if extension == ".png":
        im.save(infile, "PNG")
    else:
        im.save(infile, "JPEG")

    count -= 1
    if count % 10 == 0:
        # output some useful stats

        print str(count) + " images remaining."

print "....done"

## output:
##    total images to be resized: 3907

##    3900 images remaining.
##    3890 images remaining.
##    ... [snip].....(there were a lot!)
##    30 images remaining.
##    20 images remaining.
##    10 images remaining.
##    0 images remaining.
##    ....done

# Running this script reduced my website size
# from 96MB to 39MB!!
#
# Certainly there is still room for improvement
# future posts will ideally be aimed at further
# efficiency gains.

Saturday, October 3, 2009

Python - using sqlite3 module for persistant data

# The sqlite3 lets you create and use
# a database with just a file

import sqlite3
# more detailed python doc sqlite3 

import os
# in this example we get the current working dir path 

# Choose the file to use for the
# db and connect (create it)

conn = sqlite3.connect(os.path.abspath('.') + "tempdb")

# grab a cursor and we can create the db schema
c = conn.cursor()

# if you happen to run through this example a few times
# you may notice that the data is persistant.  For this example
# we'll ensure that we're starting from ground zero
# drop the database (if it exists)

c.execute('drop table if exists users')

# create a table
c.execute('create table users (name text, age text, email text)')

# insert data
c.execute("""insert into users values ('steve', '30', 'blah@blah.com')""")
c.execute("""insert into users values ('steve2', '32', 'blah@blah2.com')""")
c.execute("""insert into users values ('steve3', '33', 'blah@blah3.com')""")

#,
#                    ('steve II', '20', 'blah2@blah.com'),
#                    ('steve III', '10', 'blah3@blah.com')""")

# now lets select our data
c.execute('select * from users')

# iterate through the results with for each
for row in c:
    print row

# output:
#    (u'steve', u'30', u'blah@blah.com')
#    (u'steve2', u'32', u'blah@blah2.com')

#    (u'steve3', u'33', u'blah@blah3.com')

Thursday, October 1, 2009

Python - using glob to get lists of files and directories

import os.path
# glob is a simple and useful python module. 
# It uses simple regular expressions to match 
# directories and files for a given path.  If 
# you've ever used the command line to 'ls' or 
# 'dir' the currently directory you may be aware 
# that the directory accepts * or ? or [] to 
# match patterns.  glob is a python implementation 
# of this functionality. 
 
import glob
import os
 
# find all the .txt files in the current working directory  
print glob.glob('*.TXT')
# output: 
# ['LICENSE.txt', 'NEWS.txt', 'README.txt'] 
 
# you can also specify a full path 
# Here I'm searching for dll files in python 2.6 
print glob.glob('C:\Python26\DLLs\*.dll')
# output: 
#    ['C:\\Python26\\DLLs\\sqlite3.dll', 
#    'C:\\Python26\\DLLs\\tcl85.dll', 
#    'C:\\Python26\\DLLs\\tclpip85.dll', 
#    'C:\\Python26\\DLLs\\tk85.dll'] 
 
# If you are expecting a great deal of results 
# you should use the glob.iglob method that returns 
# matches as it goes and does not load everything 
# into memory first. 
# glob.iglob() example 
f = glob.iglob('C:\Python26\Lib\*')
 
spitItOut = True
while spitItOut:
    try:
        fileNameAndPath = f.next()
        # since glob gives you the full path you can 
        # use the output with some of the os module's methods 
        if os.path.isfile(fileNameAndPath):
            fileNameAndPath += " is a file." 
        else:
            fileNameAndPath += " is not a file." 
        print fileNameAndPath
    except StopIteration:
        spitItOut = False
 
#output (snipped a bit...since there a lot): 
#    C:\Python26\Lib\abc.py is a file. 
#    ....[snip] 
#    C:\Python26\Lib\compiler is not a file. 
#    ...[another snip] 
#    C:\Python26\Lib\getopt.py is a file. 
#    C:\Python26\Lib\getopt.pyc is a file. 
#    C:\Python26\Lib\getpass.py is a file. 
#    C:\Python26\Lib\gettext.py is a file. 
#    C:\Python26\Lib\glob.py is a file. 
#    C:\Python26\Lib\glob.pyc is a file.

Wednesday, September 23, 2009

Python - pure python ping using raw sockets

# I've searched the web far and wide.  I've written my own

#    os.popen() version to ride an operating system's ping.

#    I finally found a pure raw ping implementation in python

#    that seems to work!

# I copied the entire page from:

#   http://svn.pylucid.net/pylucid/CodeSnippets/ping.py

#########################################################

#!/usr/bin/env python

 
"""
    A pure python ping implementation using raw socket.
 
 
    Note that ICMP messages can only be sent from processes running as root.
 
 
    Derived from ping.c distributed in Linux's netkit. That code is
    copyright (c) 1989 by The Regents of the University of California.
    That code is in turn derived from code written by Mike Muuss of the
    US Army Ballistic Research Laboratory in December, 1983 and
    placed in the public domain. They have my thanks.
 
    Bugs are naturally mine. I'd be glad to hear about them. There are
    certainly word - size dependenceies here.
 
    Copyright (c) Matthew Dixon Cowles, <http://www.visi.com/~mdc/>.
    Distributable under the terms of the GNU General Public License
    version 2. Provided with no warranties of any sort.
 
    Original Version from Matthew Dixon Cowles:
      -> ftp://ftp.visi.com/users/mdc/ping.py
 
    Rewrite by Jens Diemer:
      -> http://www.python-forum.de/post-69122.html#69122
 
 
    Revision history
    ~~~~~~~~~~~~~~~~
 
    May 30, 2007
    little rewrite by Jens Diemer:
     -  change socket asterisk import to a normal import
     -  replace time.time() with time.clock()
     -  delete "return None" (or change to "return" only)
     -  in checksum() rename "str" to "source_string"
 
    November 22, 1997
    Initial hack. Doesn't do much, but rather than try to guess
    what features I (or others) will want in the future, I've only
    put in what I need now.
 
    December 16, 1997
    For some reason, the checksum bytes are in the wrong order when
    this is run under Solaris 2.X for SPARC but it works right under
    Linux x86. Since I don't know just what's wrong, I'll swap the
    bytes always and then do an htons().
 
    December 4, 2000
    Changed the struct.pack() calls to pack the checksum and ID as
    unsigned. My thanks to Jerome Poincheval for the fix.
 
 
    Last commit info:
    ~~~~~~~~~~~~~~~~~
    $LastChangedDate: $
    $Rev: $
    $Author: $
""" 
 
 
import os, sys, socket, struct, select, time 
 
# From /usr/include/linux/icmp.h; your milage may vary. 
ICMP_ECHO_REQUEST = 8 # Seems to be the same on Solaris. 
 
 
def checksum(source_string): 
    """
    I'm not too confident that this is right but testing seems
    to suggest that it gives the same answers as in_cksum in ping.c
    """ 
    sum = 0 
    countTo = (len(source_string)/2)*2 
    count = 0 
    while count<countTo: 
        thisVal = ord(source_string[count + 1])*256 + ord(source_string[count]) 
        sum = sum + thisVal 
        sum = sum & 0xffffffff # Necessary? 
        count = count + 2 
 
    if countTo<len(source_string): 
        sum = sum + ord(source_string[len(source_string) - 1]) 
        sum = sum & 0xffffffff # Necessary? 
 
    sum = (sum >> 16)  +  (sum & 0xffff) 
    sum = sum + (sum >> 16) 
    answer = ~sum 
    answer = answer & 0xffff 
 
    # Swap bytes. Bugger me if I know why. 
    answer = answer >> 8 | (answer << 8 & 0xff00) 
 
    return answer 
 
 
def receive_one_ping(my_socket, ID, timeout): 
    """
    receive the ping from the socket.
    """ 
    timeLeft = timeout 
    while True: 
        startedSelect = time.clock() 
        whatReady = select.select([my_socket], [], [], timeLeft) 
        howLongInSelect = (time.clock() - startedSelect) 
        if whatReady[0] == []: # Timeout 
            return 
 
        timeReceived = time.clock() 
        recPacket, addr = my_socket.recvfrom(1024) 
        icmpHeader = recPacket[20:28] 
        type, code, checksum, packetID, sequence = struct.unpack( 
            "bbHHh", icmpHeader 
        ) 
        if packetID == ID: 
            bytesInDouble = struct.calcsize("d") 
            timeSent = struct.unpack("d", recPacket[28:28 + bytesInDouble])[0] 
            return timeReceived - timeSent 
 
        timeLeft = timeLeft - howLongInSelect 
        if timeLeft <= 0: 
            return 
 
 
def send_one_ping(my_socket, dest_addr, ID): 
    """
    Send one ping to the given >dest_addr<.
    """ 
    dest_addr  =  socket.gethostbyname(dest_addr) 
 
    # Header is type (8), code (8), checksum (16), id (16), sequence (16) 
    my_checksum = 0 
 
    # Make a dummy heder with a 0 checksum. 
    header = struct.pack("bbHHh", ICMP_ECHO_REQUEST, 0, my_checksum, ID, 1) 
    bytesInDouble = struct.calcsize("d") 
    data = (192 - bytesInDouble) * "Q" 
    data = struct.pack("d", time.clock()) + data 
 
    # Calculate the checksum on the data and the dummy header. 
    my_checksum = checksum(header + data) 
 
    # Now that we have the right checksum, we put that in. It's just easier 
    # to make up a new header than to stuff it into the dummy. 
    header = struct.pack( 
        "bbHHh", ICMP_ECHO_REQUEST, 0, socket.htons(my_checksum), ID, 1 
    ) 
    packet = header + data 
    my_socket.sendto(packet, (dest_addr, 1)) # Don't know about the 1 
 
 
def do_one(dest_addr, timeout): 
    """
    Returns either the delay (in seconds) or none on timeout.
    """ 
    icmp = socket.getprotobyname("icmp") 
    try: 
        my_socket = socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp) 
    except socket.error, (errno, msg): 
        if errno == 1: 
            # Operation not permitted 
            msg = msg + ( 
                " - Note that ICMP messages can only be sent from processes" 
                " running as root." 
            ) 
            raise socket.error(msg) 
        raise # raise the original error 
 
    my_ID = os.getpid() & 0xFFFF 
 
    send_one_ping(my_socket, dest_addr, my_ID) 
    delay = receive_one_ping(my_socket, my_ID, timeout) 
 
    my_socket.close() 
    return delay 
 
 
def verbose_ping(dest_addr, timeout = 2, count = 4): 
    """
    Send >count< ping to >dest_addr< with the given >timeout< and display
    the result.
    """ 
    for i in xrange(count): 
        print "ping %s..." % dest_addr, 
        try: 
            delay  =  do_one(dest_addr, timeout) 
        except socket.gaierror, e: 
            print "failed. (socket error: '%s')" % e[1] 
            break 
 
        if delay  ==  None: 
            print "failed. (timeout within %ssec.)" % timeout 
        else: 
            delay  =  delay * 1000 
            print "get ping in %0.4fms" % delay 
    print 
 
 
if __name__ == '__main__': 
    verbose_ping("heise.de") 
    verbose_ping("google.com") 
    verbose_ping("a-test-url-taht-is-not-available.com") 
    verbose_ping("192.168.1.1") 
 
 
#output (for me): 
##        ping heise.de... get ping in 161.5423ms 
##        ping heise.de... get ping in 161.8938ms 
##        ping heise.de... get ping in 161.8139ms 
##        ping heise.de... get ping in 161.0677ms 
## 
##        ping google.com... get ping in 55.2157ms 
##        ping google.com... get ping in 54.8570ms 
##        ping google.com... get ping in 54.9019ms 
##        ping google.com... get ping in 54.7282ms 
## 
##        ping a-test-url-taht-is-not-available.com... failed. (socket error: 'getaddrinfo failed') 
## 
##        ping 192.168.1.1... get ping in 2.6651ms 
##        ping 192.168.1.1... get ping in 3.4502ms 
##        ping 192.168.1.1... get ping in 2.0416ms 
##        ping 192.168.1.1... get ping in 1.9452ms

python - copy images (or any file) from the web to local machine

# copy images off of the web to your local machine
#  (this should work for any files off the web...not just images)

import urllib
import os

url_of_file = "http://www.example.com/images/example_image.jpg"
local_file = "local_copy.jpg"

# retrieve from web and put in local_file
urllib.urlretrieve(url_of_file, local_file)

# now proof that the file was copied
# just listing all files in working directory
print os.listdir('.')

Tuesday, September 22, 2009

python - os module and working directory

import os 
 
# curdir attribute lists the current directory 
# which is always '.' .... which means...right here 
print os.curdir 
 
#output: 
# . 
 
# to see what the path to the curdir 
print os.path.abspath(os.path.curdir) 
 
#output 
# C:\Documents and Settings\steve\My Documents\python 
 
# and then to see what files are in the curdir 
print os.listdir(os.curdir) 
#or 
print os.listdir(os.path.abspath(os.path.curdir)) 
 
# output for either listdir 
#['colormaker.py', 'createThumbnail.py', 'strFunctions.py', 'pycolor.py']

Wednesday, September 16, 2009

Python - determine an image's type (regardless of extension)

# iterate through all the files in the current directory
# identify the type of image file (regardless of extension)


import os
import imghdr


# list files in the current working directory 

for f in os.listdir('.'):
    if os.path.isfile(f):
        imgtype = imghdr.what(f)
        if imgtype is None:
            imgtype = "not image"

    print "'" + f + "'" + " is type " + imgtype



#output: (for me)

#'2007 Oct 31 146.jpg' is type jpeg

#
#'2007 Oct 31 147.jpg' is type jpeg
#
#'2007 Oct 31 148.jpg' is type jpeg
#
#'2007 Oct 31 149.image' is type jpeg

#
#'2007 Oct 31 150.jpg' is type jpeg
#
#'big kitty.GI' is type gif
#
#'eric.image' is type jpeg

#
#'LICENSE.txt' is type not image
#
#'NEWS.txt' is type not image
#

Thursday, July 16, 2009

Python - ping one or many addresses at once

# ping a range or a single ip address 
# This implementation creates a thread for each ip address 
# you wish to ping. 
# 
# this version utilizes the os' ping and 
# drives the ping through os.popen 
# 
# if you are looking for a pure python version you should try this 
# 
# if you are looking for a better way to drive 
# subprocesses then look here 

import os, time, sys
from threading import Thread
 
def usage():
    print """ 
    usage: ping_o_matic.py ip_or_ip_range 
 
    This script pings an ip address or a range of ip addresses and 
    returns the result of the ping. If a '*' character is not found 
    in the ipaddress then the script assumes that you are pinging a 
    single ip by ip or dns. 
    If a range of ip addresses is passed then only a class c network 
    can be pinged, nothing larger. 
    example: 
    ping_o_matic.py 192.168.1.1 # pings the ip address 192.168.1.1 
    ping_o_matic.py 192.168.1.* # pings entire class c subnet of 192.168.1.1-255 
    """ 
 
class pingy(Thread): # class inherits from thread 
    def __init__(self, ip):
        Thread.__init__(self) # calls super init 
        self.information = ("yet to run", "no response","Active")
        self.ip = ip
        self.status = 0 # corresponds to the information tuple -- 0 = "yet to run" 
    def run(self):
        pingexe = os.popen("ping -n 2 "+self.ip, "r")
        self.status = 1 #running but no response yet 
        while True:
            line = pingexe.readline()
            if not line: # if done pinging 
                break 
            if line.find(self.ip) and line.find("Reply") > -1: # they exist: 
                self.status = 2 # 2=Active 
    def getStatus(self):
        return self.information[self.status]
 
if __name__ == '__main__':
    # only accept one parameter -- the rest are ignored 
    try:
        ipaddr = sys.argv[1]
    except:
        usage()
        sys.exit()
 
    
 
    pinglist = []
    ip = ipaddr
    if ipaddr.find('*') == -1:
        current = pingy(ip)
        pinglist.append(current)
        current.start()
    else: # then its a range of IPs 
        lastDot = ipaddr.find('*')
        for host in range(0,255):
            ip = ipaddr[0:lastDot]+str(host)
            current = pingy(ip)
            pinglist.append(current)
            current.start()
    for pig in pinglist: # loop through all threads and collect and display status 
        pig.join()
        print pig.ip, ", ", pig.getStatus()
 
#output (for me): 
#python pingmanyatonce.py 10.0.0.1 
#10.0.0.1 ,  Active 
# 
# 
#python pingmanyatonce.py 10.0.0.* 
#10.0.0.0 ,  no response 
#10.0.0.1 ,  Active 
#10.0.0.2 ,  Active 
#10.0.0.3 ,  no response 
#10.0.0.4 ,  no response 
#10.0.0.5 ,  no response 
#10.0.0.6 ,  no response 
#10.0.0.7 ,  no response 
#... # and it keeps going for the entire subnet...

# try out the full source