I try to run my spider on scrapinghub, and run it getting an error
Traceback (most recent call last):
File "/usr/local/lib/python3.6/site-packages/twisted/internet/defer.py", line 1418, in _inlineCallbacks
result = g.send(result)
File "/usr/local/lib/python3.6/site-packages/scrapy/crawler.py", line 80, in crawl
self.engine = self._create_engine()
File "/usr/local/lib/python3.6/site-packages/scrapy/crawler.py", line 105, in _create_engine
return ExecutionEngine(self, lambda _: self.stop())
File "/usr/local/lib/python3.6/site-packages/scrapy/core/engine.py", line 70, in __init__
self.scraper = Scraper(crawler)
File "/usr/local/lib/python3.6/site-packages/scrapy/core/scraper.py", line 71, in __init__
self.itemproc = itemproc_cls.from_crawler(crawler)
File "/usr/local/lib/python3.6/site-packages/scrapy/middleware.py", line 53, in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "/usr/local/lib/python3.6/site-packages/scrapy/middleware.py", line 35, in from_settings
mw = create_instance(mwcls, settings, crawler)
File "/usr/local/lib/python3.6/site-packages/scrapy/utils/misc.py", line 144, in create_instance
return objcls(*args, **kwargs)
File "/app/__main__.egg/skripsi/pipelines.py", line 19, in __init__
File "/app/__main__.egg/skripsi/pipelines.py", line 29, in create_connection
File "/app/python/lib/python3.6/site-packages/mysql/connector/__init__.py", line 173, in connect
return MySQLConnection(*args, **kwargs)
File "/app/python/lib/python3.6/site-packages/mysql/connector/connection.py", line 104, in __init__
self.connect(**kwargs)
File "/app/python/lib/python3.6/site-packages/mysql/connector/abstracts.py", line 780, in connect
self._open_connection()
File "/app/python/lib/python3.6/site-packages/mysql/connector/connection.py", line 284, in _open_connection
self._socket.open_connection()
File "/app/python/lib/python3.6/site-packages/mysql/connector/network.py", line 532, in open_connection
errno=2003, values=(self.get_address(), _strioerror(err)))
mysql.connector.errors.InterfaceError: 2003: Can't connect to MySQL server on '127.0.0.1:3306' (111 Connection refused)
I have tried to add mysql-connector-python on requirements.txt and Configure my dependencies on scrapinghub.yml like this this
my requirements.txt
mysql-connector-python
my scrapinghub.yml
projects:
default: 396892
stacks:
default: scrapy:1.6-py3
requirements:
file: requirements.txt
My pipelines.py
import mysql.connector
class SkripsiPipeline(object):
def __init__(self):
self.create_connection()
# dispatcher.connect(self.close_spider, signals.close_spider)
# self.create_table()
def create_connection(self):
self.conn = mysql.connector.connect(
host = '127.0.0.1',
password = '',
user = 'root',
database = 'news'
)
self.curr = self.conn.cursor()
def process_item(self, item, spider):
self.store_db(item)
return item
def store_db(self,item):
self.curr.execute("INSERT INTO news_tb (url, title, author, time, crawl_time, imagelink, content) values (%s,%s,%s,%s,%s,%s,%s)",(
item['url'][0],
item['title'][0],
item['author'][0],
item['time'][0],
item['crawl_time'][0],
item['imagelink'][0],
item['content'][0]
))
self.conn.commit()
This is the mistake I faced when running a spider on scrapinghub. Anyone who is familiar with this problem, please let me know.
thanks.
It's not possible at all. Because of ScrapyCloud doesn't provide any SQL support. You are trying to connect to 127.0.0.1 - it's localhost, it means that MySQL should be installed on ScrapyCloud and running. That's impossible. One thing that I would recommend you is to run MySQL somewhere on the web and connect to in by domain/global ip address