I'm trying to scrape the website using Scrapy. To get the content which I want I need to login first. The url is login_url
There I have form as follows:
My code is as follows:
LOGIN_URL1 = "https://www.partslink24.com/partslink24/user/login.do"
class PartsSpider(scrapy.Spider):
name = "parts"
login_url = LOGIN_URL1
start_urls = [
login_url,
]
def parse(self, response):
form_data = {
'accountLogin': COMPANY_ID,
'userLogin': USERNAME,
'loginBean.password': PASSWORD
}
yield FormRequest(url=self.login_url, formdata=form_data, callback=self.parse1)
def parse1(self, response):
inspect_response(response, self)
print("RESPONSE: {}".format(response))
def start_scraper(vin_number):
process = CrawlerProcess()
process.crawl(PartsSpider)
process.start()
But the problem is that they check if the session is activated and I get an error, the form can not be submitted.
When I check the response which I get after submitting the login form, I get the following error:
The code on their site which checks that is as follows:
var JSSessionChecker = {
check: function()
{
if (!Ajax.getTransport())
{
alert('NO_AJAX_IN_BROWSER');
}
else
{
new Ajax.Request('/partslink24/checkSessionCookies.do', {
method:'post',
onSuccess: function(transport)
{
if (transport.responseText != 'true')
{
if (Object.isFunction(JSSessionChecker.showError)) JSSessionChecker.showError();
}
},
onFailure: function(e)
{
if (Object.isFunction(JSSessionChecker.showError)) JSSessionChecker.showError();
},
onException: function (request, e)
{
if (Object.isFunction(JSSessionChecker.showError)) JSSessionChecker.showError();
}
});
}
},
showError: function()
{
var errorElement = $('sessionCheckError');
if (errorElement)
{
errorElement.show();
}
}
};
JSSessionChecker.check();
And on success it returns only true.
Is there any way that I can activate the session before submitting a form?
Thanks in advance.
EDIT
The error page which I get using the answer from @fam.
Please check this code.
import scrapy
LOGIN_URL1 = "https://www.partslink24.com/partslink24/user/login.do"
class PartsSpider(scrapy.Spider):
name = "parts"
login_url = LOGIN_URL1
start_urls = [
login_url,
]
def parse(self, response):
form_data = {
'loginBean.accountLogin': "COMPANY_ID",
'loginBean.userLogin': "USERNAME",
'loginBean.sessionSqueezeOut' : "false",
'loginBean.password': "PASSWORD",
'loginBean.userOffsetSec' : "18000",
'loginBean.code2f' : ""
}
yield scrapy.FormRequest.from_response(response=response, url=self.login_url, formdata=form_data, callback=self.parse1)
def parse1(self, response):
#scrapy.inspect_response(response, self)
print("RESPONSE: {}".format(response))
def start_scraper(vin_number):
process = scrapy.CrawlerProcess()
process.crawl(PartsSpider)
process.start()
I am not getting an error and the response is as follows:
RESPONSE: <200 https://www.partslink24.com/partslink24/user/login.do>
EDIT: The following code is for Selenium. It will log you into the page easily. You only need to download the chrome driver and install Selenium.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.options import Options
import time
chrome_options = Options()
#chrome_options.add_argument("--headless")
driver = webdriver.Chrome(executable_path="./chromedriver", options=chrome_options)
driver.get("https://www.partslink24.com/partslink24/user/login.do")
# enter the form fields
company_ID = "company id"
user_name = "user name"
password = "password"
company_ID_input = driver.find_element_by_xpath("//input[@name='accountLogin']")
company_ID_input.send_keys(company_ID)
time.sleep(1)
user_name_input = driver.find_element_by_xpath("//input[@name='userLogin']")
user_name_input.send_keys(user_name)
time.sleep(1)
password_input = driver.find_element_by_xpath("//input[@id='inputPassword']")
password_input.send_keys(password)
time.sleep(1)
# click the search button and get links from first page
click_btn = driver.find_element_by_xpath("//a[@tabindex='5']")
click_btn.click()
time.sleep(5)
Don't forget to change the credentials.