I am using Python 2.76 to submit queries to an .aspx webpage and pick up the results by BeautifulSoup, and want to store them into an Excel spreadsheet.
import mechanize
import re
import xlwt
from bs4 import BeautifulSoup
import urllib2
book = xlwt.Workbook(encoding='utf-8', style_compression = 0)
sheet = book.add_sheet('Legi', cell_overwrite_ok = True)
for items in ['university student', 'high school student']:
url = r'http://legistar.council.nyc.gov/Legislation.aspx'
request = mechanize.Request(url)
response = mechanize.urlopen(request)
forms = mechanize.ParseResponse(response, backwards_compat=False)
form = forms[0]
response.close()
form['ctl00$ContentPlaceHolder1$txtSearch'] = items
submit_page = mechanize.urlopen(form.click())
soup = BeautifulSoup(submit_page.read())
aa = soup.find_all(href=re.compile('LegislationDetail'))
for bb in aa:
cc = bb.text
#print cc
results = []
results.append(cc)
for row, legi_no in enumerate(results):
sheet.write (row, 0, legi_no)
book.save("C:\\legi results.xls")
It finds and pick up the results, if I print the variable ‘cc’. however the writing into Excel spreadsheet is not successful because it only writes the first cell.
You create the results
variable inside the for bb in aa
loop.
This means results
will get initialized to []
for each value in aa
and in the end results will contain only one element (the last one), which is ofcourse not intended.
Put the results
outside and it should work fine, as shown below.
import mechanize
import re
import xlwt
from bs4 import BeautifulSoup
import urllib2
book = xlwt.Workbook(encoding='utf-8', style_compression = 0)
sheet = book.add_sheet('Legi', cell_overwrite_ok = True)
for items in ['university student', 'high school student']:
url = r'http://legistar.council.nyc.gov/Legislation.aspx'
request = mechanize.Request(url)
response = mechanize.urlopen(request)
forms = mechanize.ParseResponse(response, backwards_compat=False)
form = forms[0]
response.close()
form['ctl00$ContentPlaceHolder1$txtSearch'] = items
submit_page = mechanize.urlopen(form.click())
soup = BeautifulSoup(submit_page.read())
aa = soup.find_all(href=re.compile('LegislationDetail'))
results = [] # Initialize results here !!!
for bb in aa:
cc = bb.text
#print cc
results.append(cc)
for row, legi_no in enumerate(results):
sheet.write (row, 0, legi_no)
book.save("C:\\legi results.xls")