from the action on my code Enter the start value for the range of company IDs: 18351 Enter the end value for the range of company IDs: 78351, the output is not what i want
import requests
from bs4 import BeautifulSoup
# Define a function to extract the contact person's name and fax number from the HTML table
def get_contact_info(soup):
contact_person = ''
fax_number = ''
for td in soup.select('td.bla8'):
if 'Contact:' in td.text:
contact_person = td.text.split('Contact:')[1].split(',')[0].strip()
if 'Fax:' in td.text:
fax_number = td.text.split('Fax:')[1].split(',')[0].strip()
return contact_person, fax_number
# Define a function to scrape the supplier details page for a given company ID
def scrape_supplier_details(company_id):
url = f'https://ha.internationaleprocurement.com/search/supplier_details.html?company_id={company_id}'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
contact_person, fax_number = get_contact_info(soup)
return contact_person, fax_number
# Prompt the user to enter the start and end values for the range of company IDs to scrape
start_id = int(input("Enter the start value for the range of company IDs: "))
end_id = int(input("Enter the end value for the range of company IDs: "))
# Scrape the supplier details page for each company ID in the specified range
for company_id in range(start_id, end_id + 1):
contact_person, fax_number = scrape_supplier_details(company_id)
print(f"Company ID: {company_id}, Contact: {contact_person}, Fax: {fax_number}")
In this example I use re
module to extract the contact persons name and fax number (if any):
import re
import requests
from bs4 import BeautifulSoup
# Define a function to extract the contact person's name and fax number from the HTML table
def get_contact_info(soup):
contact, fax = None, None
td = soup.select_one('td:not(:has(td)):-soup-contains("Contact:")')
if td:
text = td.get_text(strip=True, separator='\n')
contact = re.search(r'Contact: ([^\n]+)', text)
fax = re.search(r'Fax: ([^\n]+)', text)
return contact.group(1) if contact else '', fax.group(1) if fax else ''
# Define a function to scrape the supplier details page for a given company ID
def scrape_supplier_details(company_id):
url = f'https://ha.internationaleprocurement.com/search/supplier_details.html?company_id={company_id}'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
contact_person, fax_number = get_contact_info(soup)
return contact_person, fax_number
start_id = 18351
end_id = 78351
# Scrape the supplier details page for each company ID in the specified range
for company_id in range(start_id, end_id + 1):
contact_person, fax_number = scrape_supplier_details(company_id)
print(f"Company ID: {company_id}, Contact: {contact_person}, Fax: {fax_number}")
Prints:
Company ID: 18351, Contact: , Fax:
Company ID: 18352, Contact: , Fax:
Company ID: 18353, Contact: Jim Devlin, Fax: 609-252-8015
Company ID: 18354, Contact: Scott Grumski, Fax: 724-224-6050
Company ID: 18355, Contact: , Fax:
Company ID: 18356, Contact: Anthony Corum, Fax: 302-337-0998
Company ID: 18357, Contact: Bill Kenyon, Fax: 814-432-5678
Company ID: 18358, Contact: Sal Austin, Fax: 201-433-4334
Company ID: 18359, Contact: Raymond Petrarca, Fax: 401-921-5520
...and so on.