pythonweb-scrapingbeautifulsouppython-requestspython-requests-html

WEB SCRAPING behind LOGIN(Authentication) in Python


from bs4 import BeautifulSoup
import requests

# Start the session
session = requests.Session()

# Create the payload
payload = {'_username': '[USERNAME]', '_password': '[PASSWORD]'}

# Post the payload to the site to log in
s = session.post("https://github.com/login", data=payload)

# Navigate to the next page and scrape the data
s = session.get('https://github.com/[USERNAME]')

soup = BeautifulSoup(s.text, 'html.parser')
results = soup.find(class_='js-pinned-items-reorder-container')
job_elems = results.find_all(
    'div', class_='Box pinned-item-list-item d-flex p-3 width-full js-pinned-item-list-item public fork reorderable sortable-button-item')
for job_elem in job_elems:
    title_elem = job_elem.find('span', class_='repo')
    print(title_elem.text.strip())

It's not working out. Please Help. in the above code the "[USERNAME]" and "[PASSWORD]" are the username and password of the github account respectively.


Solution

  • Try this code

    from bs4 import BeautifulSoup
    import requests
    
    
    login = 'USERNAME'
    password = 'PASSWORD'
    
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    
    data = {'login': login,
            'password': password, 'js-webauthn-support': 'supported', 'js-webauthn-iuvpaa-support': 'unsupported',
            'commit': 'Sign in'}
    
    with requests.session() as sess:
        post_data = sess.get('https://github.com/login')
        html = BeautifulSoup(post_data.text, 'html.parser')
        
        #Update data
        data.update(timestamp_secret = html.find("input", {'name':'timestamp_secret'}).get('value'))
        data.update(authenticity_token= html.find("input", {'name':'authenticity_token'}).get('value'))
        data.update(timestamp = html.find("input", {'name':'timestamp'}).get('value'))
        #Login
        res = sess.post("https://github.com/session", data=data, headers=headers)
        
        #Check login
        res = sess.get('https://github.com/')
        try:
            username = BeautifulSoup(res.text, 'html.parser').find('meta', {'name': 'user-login'}).get('content')
        except:
            print ('Your username or password is incorrect')
        else:
            print ("You have successfully logged in as", username)