# Sometimes websites deny access to bots or unsupported browsers
# python allows you to change the user agent string so you can
# impersonate any browser you desire
import urllib
import urllib2
# unaltered header.
print "sending a request using the default python user agent string..."
url = 'http://www.example.com'
req = urllib2.Request(url)
response = urllib2.urlopen(req)
the_page = response.read()
# alter the header to look like a real browser
print "sending a request with an altered user agent string..."
url = 'http://www.example.com'
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
req = urllib2.Request(url, data=None, headers=headers) #, data) #, headers)
response = urllib2.urlopen(req)
the_page = response.read()
#Apache logs on the server indicate:
# make a request without altering the user_agent
# "POST / HTTP/1.1" 200 2022 "-" "Python-urllib/2.6"
#
# a request made with the altered user_agent
# "POST / HTTP/1.1" 200 2022 "-" "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"
A python example based blog that shows how to accomplish python goals and how to correct python errors.
Monday, September 21, 2009
Python - alter user agent string in web request
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment