Mar-10-2024, 09:18 AM
Hello. I have the follwoing Python code that checks a website for change. This script always gives me an error "Error checking website". What am I doing wrong?
import requests
import os
from bs4 import BeautifulSoup
import time
import logging
import smtplib as smtp
URL_TO_MONITOR = "https://www.yahoo.com/" #change this to the URL you want to monitor
DELAY_TIME = 15 # seconds
def process_html(string):
soup = BeautifulSoup(string, features="lxml")
# make the html look good
soup.prettify()
# remove script tags
for s in soup.select('script'):
s.extract()
# remove meta tags
for s in soup.select('meta'):
s.extract()
# convert to a string, remove '\r', and return
return str(soup).replace('\r', '')
def webpage_was_changed():
"""Returns true if the webpage was changed, otherwise false."""
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
'Pragma': 'no-cache', 'Cache-Control': 'no-cache'}
response = requests.get(URL_TO_MONITOR, headers=headers)
# create the previous_content.txt if it doesn't exist
if not os.path.exists("previous_content.txt"):
open("previous_content.txt", 'w+').close()
filehandle = open("previous_content.txt", 'r')
previous_response_html = filehandle.read()
filehandle.close()
processed_response_html = process_html(response.text)
if processed_response_html == previous_response_html:
return False
else:
filehandle = open("previous_content.txt", 'w')
filehandle.write(processed_response_html)
filehandle.close()
return True
def main():
log = logging.getLogger(__name__)
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"), format='%(asctime)s %(message)s')
log.info("Running Website Monitor")
while True:
try:
if webpage_was_changed():
log.info("WEBPAGE WAS CHANGED.")
print("The website was changed")
else:
log.info("Webpage was not changed.")
except:
log.info("Error checking website.")
time.sleep(DELAY_TIME)
if __name__ == "__main__":
main()
