import argparse import csv import json import logging import re from datetime import datetime from pathlib import Path from time import sleep import requests import yaml from platformdirs import user_config_dir, user_data_dir from requests import HTTPError from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.select import Select from validators import url as validate_url from .logger import setup_logging PROJECT_ROOT = Path(__file__).parent.parent logger = logging.getLogger(__package__) def parse_arguments() -> dict: """ Parse the arguments. :return: dict """ argparser = argparse.ArgumentParser( prog="cgn-appointments", description="Scrapes appointments from termine.stadt-koeln.de an sends a message to a ntfy server.", ) argparser.add_argument( "-s", "--services", action="store", nargs='+', type=str, help="Services to check", required=False, ) argparser.add_argument( "-l", "--locations", action="store", nargs='+', type=str, help="Locations to check", required=False, ) argparser.add_argument( "--config-file", action="store", type=Path, help="Path to the configuration file", required=False, ) argparser.add_argument( "--csv-file", action="store", type=Path, help="Path to the csv file, which stores the last fetched dates", required=False, ) argparser.add_argument( "--log-file", action="store", type=Path, help="Path to logfile", required=False, ) argparser.add_argument( "--log-level", action="store", type=str, choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"], help="Logging Level", required=False, ) return argparser.parse_args().__dict__ def update_config_with_args(config: dict, args: dict) -> dict: """ Update the configuration with the arguments. :param config: :param args: :return: dict """ update_config = { "services": args.get("services"), "locations": args.get("locations"), "csv_path": args.get("csv_file"), } for key, value in update_config.items(): if value is not None: config[key] = value if args.get("log_file") is not None: config["logging"]["handlers"]["file"]["filename"] = args.get("log_file") if args.get("log_level") is not None: config["logging"]["loggers"]["root"]["level"] = args.get("log_level") return config def get_config() -> dict: """ Get the configuration from the config.yaml file. :return: dict """ args = parse_arguments() if args.get("config_file") is not None: config_yaml = args.get("config_file") else: config_yaml = Path(user_config_dir()) / "cgn-appointments" / "config.yaml" if not config_yaml.exists(): print(f"""config.yaml not found. Creating a new one under '{config_yaml}'. Please fill in the required information.""") config_yaml.parent.mkdir(parents=True, exist_ok=True) config_yaml.touch() config_yaml.write_text(Path(Path(__file__).parent, "config_template.yaml").read_text()) exit(0) try: with open(config_yaml, "r") as file: config = dict(yaml.safe_load(file)) except FileNotFoundError: print(f"config.yaml not found in '{config_yaml}'.") exit(1) # Replace config values with given arguments return update_config_with_args(config, args) def define_csv_path(csv_path: str|None, csv_name: str|None) -> Path: """ Define the path to the csv file. :param csv_path: :param csv_name: :return: Path """ csv_path = Path(csv_path) if csv_path else None csv_name = Path(csv_name) if csv_name else None if csv_path is not None and csv_path.is_dir() and csv_name is not None: return csv_path / csv_name elif csv_path is not None and csv_path.is_dir() and csv_name is None: return csv_path / "cgn-appointments.csv" elif csv_path is not None: csv_path.touch() return csv_path elif csv_name is not None: csv_path = Path(user_data_dir()) / csv_name csv_path.touch() return csv_path else: csv_path = Path(user_data_dir()) / "cgn-appointments" csv_path.touch() return csv_path / "cgn-appointments.csv" def select_options(services, selects): """ Selects the first option of dropdown elements. :param services: :param selects: :return: """ for select in selects: parents = select.find_element(By.XPATH, "../..").text available_services = [service for service in services if service in parents] if available_services: logger.debug(f"Selecting option for {', '.join(available_services)}") for attr in select.get_property('attributes'): if attr['name'] == 'data-testid' and attr['value'].startswith("service--"): Select(select).select_by_index(1) def ntfy(server: str, topic: str, title: str, message: str, click_url: str, tags: list = None, priority: int = 3) -> None: """ Sends a notification to the ntfy server. :param title: :param click_url: :param topic: :param server: :param message: :param tags: :param priority: :return: """ data = { "topic": topic, "title": title, "message": message, "tags": tags, "click": click_url, "priority": priority } logger.debug(f"Sending notification to '{server}'.", extra={"ntfy": {"server": server} | data}) try: response = requests.post( server, data=json.dumps(data) ) response.raise_for_status() except HTTPError as e: logger.error(f"HTTP error occurred: {e}", extra={"ntfy": {"server": server} | data }) def write_csv(csv_path, lines): """ Writes the lines to a csv file. :param csv_path: :param lines: :return: """ logger.debug(f"Writing to csv file.", extra={"csv_path": csv_path, "lines": lines}) try: with open(csv_path, mode="w") as file: csv_writer = csv.writer(file) for line in lines: csv_writer.writerow(line) except Exception as e: logger.error(f"Error writing to csv file: {e}", exc_info=True, extra={"csv_path": csv_path, "lines": lines}) def main(): """ Main function. :return: """ # Get the configuration config = get_config() # Set up logging setup_logging(config.get("logging")) # Set the variables url = config.get("url") services = config.get("services") check_locations = config.get("locations") csv_name = config.get("csv_name", "cgn-appointments.csv") csv_path = define_csv_path(config.get("csv_path"), csv_name) date_regex = config.get("date_regex") date_format = config.get("date_format") ntfy_server = config.get("ntfy").get("server") ntfy_topic = config.get("ntfy").get("topic") ntfy_title = config.get("ntfy").get("title") ntfy_message = config.get("ntfy").get("message") ntfy_tags = config.get("ntfy").get("tags") ntfy_priority = config.get("ntfy").get("priority") # Validate data if not validate_url(url): logger.error(f"Invalid URL '{url}'.") exit(1) if not services: logger.error("No services defined.") exit(1) if not check_locations: logger.error("No locations defined.") exit(1) if not date_regex: logger.error("No date regex defined.") exit(1) try: re.compile(date_regex) except re.error: logger.error(f"Invalid date regex '{date_regex}'.") exit(1) if not date_format: logger.error("No date format defined.") exit(1) if not validate_url(ntfy_server): logger.error(f"Invalid ntfy server '{ntfy_server}'.") exit(1) if not ntfy_topic: logger.error("No ntfy topic defined.") exit(1) locations = {} options = Options() options.add_argument("--headless=new") driver = webdriver.Chrome(options=options) # Open the website logger.debug(f"Opening website: {url}") driver.get(url) # Start a new session if the previous one expired session_expired = driver.find_element(By.ID, "page_title").text == "Ihre Sitzung ist abgelaufen." if session_expired: logger.debug("Session expired. Starting a new session.") new_session_button = driver.find_element(By.CLASS_NAME, "button") new_session_button.click() # Select services selects = driver.find_elements(By.TAG_NAME, 'select') select_options(services, selects) # Click next button next_button = driver.find_element(By.TAG_NAME, "button") next_button.click() # Wait for the page to load logger.debug("Waiting 10 seconds for the page to load...") sleep(10) # Get location containers location_containers = driver.find_elements(By.CLASS_NAME, "location-container") for loc in check_locations: for location_container in location_containers: loc_title = location_container.find_element(By.CLASS_NAME, "location_title") if loc in loc_title.text: locations.update({loc: {"location_container": location_container}}) if len(locations) > 0: logger.debug(f"Location containers found", extra={"locations": locations}) else: logger.warning("No location containers found.") # Get earliest date for each location for loc in locations.keys(): location_container = locations[loc]["location_container"] date_text = location_container.find_element(By.CLASS_NAME, "earliest").text found_date = re.search(date_regex, date_text) if found_date: locations[loc]["earliest"] = datetime.strptime(found_date.group(), date_format) elif "kein Termin gefunden" in date_text: locations[loc]["earliest"] = None logger.info(f"No appointment found for {loc}.") else: logger.warning(f"Could not find date or 'kein Termin gefunden' for {loc}.", extra={"location": loc, "date_text": date_text}) # Store session URL and close browser session_url = driver.current_url driver.quit() logger.debug("Browser closed.") # Read previous data from file csv_path.touch() with open(csv_path, mode="r") as file: csv_file = list(csv.reader(file)) logger.debug(f"Read csv file", extra={"csv_path": csv_path, "csv_file": csv_file}) # Append previous data to locations if len(csv_file) > 0: for n, line in enumerate(csv_file): if len(line) == 2 and line[0] in check_locations: locations[line[0]]["previous"] = datetime.strptime(line[1], date_format) elif len(line) != 2: logger.debug("Invalid line in csv file.", extra={"line_number": n, "line": line}) exit(1) # Compare previous and new dates and send notification if new date is different lines = [] for name, data in locations.items(): previous_date = data.get("previous") new_date = data.get("earliest") if new_date and new_date != previous_date: logger.info(f"New appointment found for {name}: {new_date}", extra={"location": name, "previous_date": previous_date, "new_date": new_date}) lines.append((name, new_date.strftime(date_format))) ntfy( ntfy_server, ntfy_topic, ntfy_title, ntfy_message % (name, new_date), session_url, ntfy_tags, ntfy_priority, ) elif previous_date is not None: lines.append((name, previous_date.strftime(date_format))) # Write new data to file write_csv(csv_path, lines) if __name__ == "__main__": try: main() except Exception as e: logger.error(f"An error occurred: {e}", exc_info=True) exit(1)