pythonweb-scrapingherokudyno

How to avoid Heroku dyno sleep for my web-scrapper


I have written this python web-scrapper script and have deployed it on Heroku. It is continuous running script and does web scrapping after each minute of sleeping . The problem is that it works fine for few minutes then stops doing anything. I think dyno goes to sleep. How can i prevent that? how can i keep my script running without dyno idling or some other problem is happening here?

import time
from bs4 import BeautifulSoup
import urllib.request
import schedule
from bs4.element import Tag

url = 'url_here'  
prev_news = []  # Store the previous state of news to compare changes
prev_updated = "Initial Value"
updated_news = []


   
def compare_variables(prev, curr):
   """
   Compare two variables and print the comparison result.
   """
   if prev == curr:
       print("No change values")
       return False
   else:
       print("Change detected")
       return True
   
def fetch_last_updated() -> str:
   page = urllib.request.urlopen(url)
   soup = BeautifulSoup(page.read(), 'html.parser')
  """
  .......
  """
   return last_updated


def fetch_latest_news() -> list:
   """
   ...
   """
   return latest_news



def value(component: Tag) -> list:
   result = []
   """
   ...
   """
   return result

def change_link(url: str) -> str:
   if url != None:
       """
       ...
       """
       return url
   else:
       return None
   

def job():
     # Fetch last updated information from DTU Official Webpage
       curr_updated = fetch_last_updated()
       print("Runnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnning")
       # Compare the two variables
       if compare_variables(prev_updated, curr_updated):
           # Changes detected in last updated information
           print("Last Updated has changed to :", curr_updated)

           curr_news = fetch_latest_news()

       # Update the previous value for the next iteration
       prev_updated = curr_updated


schedule.every(1).minutes.do(job)

if __name__ == "__main__":
  
  while True:
   schedule.run_pending()
   time.sleep(1)

Solution

  • Maybe you could try to ping you app with an external app (e.g Kaffeine) in regular intervals.