# Sometimes websites deny access to bots or unsupported browsers
# python allows you to change the user agent string so you can
# impersonate any browser you desire
import urllib
import urllib2
# unaltered header.
print "sending a request using the default python user agent string..."
url = 'http://www.example.com'
req = urllib2.Request(url)
response = urllib2.urlopen(req)
the_page = response.read()
# alter the header to look like a real browser
print "sending a request with an altered user agent string..."
url = 'http://www.example.com'
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
req = urllib2.Request(url, data=None, headers=headers) #, data) #, headers)
response = urllib2.urlopen(req)
the_page = response.read()
#Apache logs on the server indicate:
# make a request without altering the user_agent
# "POST / HTTP/1.1" 200 2022 "-" "Python-urllib/2.6"
#
# a request made with the altered user_agent
# "POST / HTTP/1.1" 200 2022 "-" "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"
A python example based blog that shows how to accomplish python goals and how to correct python errors.
Showing posts with label header. Show all posts
Showing posts with label header. Show all posts
Monday, September 21, 2009
Python - alter user agent string in web request
Subscribe to:
Posts (Atom)