import urllib import urllib2 import csv url = 'http://www.bts.gov/airline_employment/src/datadisp_csv.xml?o=NAME+ASC' values = {'month' : '09', 'year' : '2008'} data = urllib.urlencode(values) req = urllib2.Request(url, data) response = urllib2.urlopen(req) the_page = response.read() f = open('test2.csv', 'r+') f.write(the_page) raw = open('/home/abhi/Dropbox/hobby/govdata/test2.csv', "rb") clean = open('/home/abhi/Dropbox/hobby/govdata/clean.csv', "w") #write header clean.write('Group,Month,Year,Carrier Name,Full Time,Part Time,Total,Blank\n') group = "Major" year = "2008" month = "09" for line in raw: if line.find(',') == 2: strline = str(line) strline = strline.replace(' ,,,','') if strline.find('TOTAL') < 0: thisline = group + ',' + month + ',' + year + ','+ strline clean.write(thisline) elif line.find(',') == 10: group = "National" elif line.find(',') == 16: group = "Large Regional" elif line.find(',') == 17 & line.find('G') > 1: group = "Medium Regional" else : print "" cleanread = open('/home/abhi/Dropbox/hobby/govdata/clean.csv', "rb") for line in cleanread: print line