I have written this python web-scrapper script and have deployed it on Heroku. It is continuous running script and does web scrapping after each minute of sleeping . The problem is that it works fine for few minutes then stops doing anything. I think dyno goes to sleep. How can i prevent that? how can i keep my script running without dyno idling or some other problem is happening here?
import time
from bs4 import BeautifulSoup
import urllib.request
import schedule
from bs4.element import Tag
url = 'url_here'
prev_news = [] # Store the previous state of news to compare changes
prev_updated = "Initial Value"
updated_news = []
def compare_variables(prev, curr):
"""
Compare two variables and print the comparison result.
"""
if prev == curr:
print("No change values")
return False
else:
print("Change detected")
return True
def fetch_last_updated() -> str:
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page.read(), 'html.parser')
"""
.......
"""
return last_updated
def fetch_latest_news() -> list:
"""
...
"""
return latest_news
def value(component: Tag) -> list:
result = []
"""
...
"""
return result
def change_link(url: str) -> str:
if url != None:
"""
...
"""
return url
else:
return None
def job():
# Fetch last updated information from DTU Official Webpage
curr_updated = fetch_last_updated()
print("Runnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnning")
# Compare the two variables
if compare_variables(prev_updated, curr_updated):
# Changes detected in last updated information
print("Last Updated has changed to :", curr_updated)
curr_news = fetch_latest_news()
# Update the previous value for the next iteration
prev_updated = curr_updated
schedule.every(1).minutes.do(job)
if __name__ == "__main__":
while True:
schedule.run_pending()
time.sleep(1)
Maybe you could try to ping you app with an external app (e.g Kaffeine) in regular intervals.