I have been working on a small project which is a web-crawler template. Im having an issue in pycharm where I am getting a warning Unresolved attribute reference 'domain' for class 'Scraper'
from abc import abstractmethod
import requests
import tldextract
class Scraper:
scrapers = {}
def __init_subclass__(scraper_class):
Scraper.scrapers[scraper_class.domain] = scraper_class # Unresolved attribute reference 'domain' for class 'Scraper'
@classmethod
def for_url(cls, url):
k = tldextract.extract(url)
# Returns -> <scraper.SydsvenskanScraper object at 0x000001E94F135850> & Scraped BBC News<!DOCTYPE html><html Which type annotiation?
return cls.scrapers[k.registered_domain](url)
@abstractmethod
def scrape(self):
pass
class BBCScraper(Scraper):
domain = 'bbc.co.uk'
def __init__(self, url):
self.url = url
def scrape(self):
rep = requests.Response = requests.get(self.url)
return "Scraped BBC News" + rep.text[:20] # ALL HTML CONTENT
class SydsvenskanScraper(Scraper):
domain = 'sydsvenskan.se'
def __init__(self, url):
self.url = url
def scrape(self):
rep = requests.Response = requests.get(self.url)
return "Scraped Sydsvenskan News" + rep.text[:20] # ALL HTML CONTENT
if __name__ == "__main__":
URLS = ['https://www.sydsvenskan.se/', 'https://www.bbc.co.uk/']
for urls in URLS:
get_product = Scraper.for_url(urls)
r = get_product.scrape()
print(r)
Of course I could ignore it as it is working but I do not like to ignore a warning as I believe pycharm is smart and should solve the warning rather than ignoring it and I wonder what is the reason of it warns me regarding that?
There are a few different levels on how you can remove this warning:
class Scraper:
scrapers = {}
domain = None # Or a sensible value of one exists
from typing import ClassVar
class Scraper:
scrapers: ClassVar[dict[str, 'Scraper']] = {}
domain: ClassVar[str]
Note that ClassVar
is required because otherwise it is assume that they are instance attributes.