pythonpython-3.xweb-scrapingbeautifulsoup

Trying to Loop Through Array of Tickers and Download All Tables from Web


I think this code snippet is close to working, but it doesn't download data from the website that it's pointed to. I am trying to download the table named 'fs-table' and I want to put each 'fs-table' on individual Excel worksheets.

# pip install -U multi-mechanize
import mechanize
mech = mechanize.Browser()
from mechanize import Browser
from BeautifulSoup import BeautifulSoup
from openpyxl import load_workbook
from openpyxl import Workbook

mech = Browser()

tckr = ['SBUX','MSFT','AAPL']
url = "https://finance.google.com/finance?q=NASDAQ:" + tckr + "&fstype=ii"
page = mech.open(url)
html = page.read()
soup = BeautifulSoup(html)
table = soup.find("fs-table", border=1)

url_list = [url + s for s in tckr]

for url in url_list:
    try:
        wb1 = Workbook()
        ws1 = wb1.active
        wb1 = load_workbook('C:/Users/Excel/Desktop/template.xlsx')
        wb1.create_sheet(tckr)
        with open('C:/Users/Excel/Desktop/today.csv', 'a', newline='') as f:   
            for row in table.findAll('tr')[1:]:
                col = row.findAll('td')
                rank = col[0].string
                artist = col[1].string
                album = col[2].string
                cover_link = col[3].img['src']
                record = (rank, artist, album, cover_link)
                print("|".join(record))

    except HTTPError:
        print("{} - not found".format(url))
    wb1.save('C:/Users/Excel/Desktop/template.xlsx') 

Here is the website I am trying to work with.

enter image description here

Now, I am getting this message: ModuleNotFoundError: No module named 'mechanize'

But, I already installed multi-mechanize!

enter image description here

I am using Python 3.6.1; Spyder 3.2.4


Solution

  • Try this. It will fetch you the tabular data from that site.

    from bs4 import BeautifulSoup
    import requests
    
    URL = "https://finance.google.com/finance?q=NASDAQ:{}&fstype=ii"
    
    def Get_Table(ticker):   
        response = requests.get(URL.format(ticker))
        soup = BeautifulSoup(response.text,"lxml")
        table = soup.select_one("#fs-table")
        for items in table.select(" tr"):
            data = [' '.join(item.text.split()) for item in items.select("th,td")]
            print(data)
    
    if __name__ == '__main__':
        for tckr in ['SBUX','MSFT','AAPL']:
            Get_Table(tckr)