Merge branch 'dev' into main

2021-09-12 15:26:20 +02:00 · 2021-09-12 15:26:20 +02:00 · 76c24041e0
parent 46b049b1f7 fec991792b
commit 76c24041e0
2 changed files with 52 additions and 29 deletions
--- a/main.py
+++ b/main.py
@ -9,23 +9,25 @@ from models import TazDownloader, TazConfiguration
 from exceptions import TazConfigurationError, TazDownloadError, TazDownloadFormatException
 # Get directory
-dir_path = os.path.dirname(os.path.realpath(__file__)) + '/'
+dir_path = os.path.dirname(os.path.realpath(__file__))
 def main(config: dict):
    # Get german date for tomorrow
    tomorrow = (datetime.now(pytz.timezone('Europe/Berlin')) + timedelta(1)).strftime('%Y_%m_%d')
    # Define tmp/ folder
    tmp_folder = os.path.join(dir_path, 'tmp')
    # Set log level
    try:
        logging.getLogger().setLevel(config['log_level'].upper())
    except ValueError as e:
-        logging.error(f"Could not set log level.\n{e}", exc_info=True)
+        logging.error(f"Could not set log level.\n    {e}")
    # Read download history from csv file
    try:
-        df = pd.read_csv(dir_path + 'download_history.csv', header=0)
+        df = pd.read_csv(os.path.join(dir_path, 'download_history.csv'), header=0)
    except FileNotFoundError:
        # In case, there isn't yet a csv file, create data frame with headers
        df = pd.DataFrame(
@ -42,14 +44,13 @@ def main(config: dict):
                logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.')
                sys.exit(0)
        except Exception as e:
-            logging.error(f"Could not check whether tomorrow's newspaper has already been downloaded.\n{e}",
+            logging.error(f"Could not check whether tomorrow's newspaper has already been downloaded.\n    {e}")
                          exc_info=True)
    # Instantiate downloader object
    try:
        taz_dl = TazDownloader(config['id'], config['password'], config['download_format'])
    except TazDownloadFormatException as e:
-        logging.error(e, exc_info=True)
+        logging.error(e)
        sys.exit(1)
    try:
@ -62,17 +63,17 @@ def main(config: dict):
        # Find newspaper which are not already downloaded
        newspaper_to_download = [n for n in newspaper_available if n not in df.file.values]
    except TazDownloadError as e:
-        logging.error(e, exc_info=True)
+        logging.error(e)
        sys.exit(1)
    # Download newspaper
    newspaper_downloaded = []
    for n in newspaper_to_download:
        try:
-            if taz_dl.download_newspaper(n):
+            if taz_dl.download_newspaper(n, tmp_folder):
                newspaper_downloaded.append(n)
        except Exception as e:
-            logging.error(f"Could not download {n}\n{e}", exc_info=True)
+            logging.error(f"Could not download {n}\n    {e}")
    # Add downloaded newspaper to download_history.csv
    try:
@ -85,26 +86,34 @@ def main(config: dict):
            )
            df = df.append(df_tmp, ignore_index=True)
        df.sort_values(by='file', ascending=False, inplace=True)
-        df.to_csv(dir_path + 'download_history.csv', index=False)
+        df.to_csv(os.path.join(dir_path, 'download_history.csv'), index=False)
    except Exception as e:
-        logging.error(f"Could not update download_history.csv\n{e}", exc_info=True)
+        logging.error(f"Could not update download_history.csv\n    {e}")
    # Move downloaded file to download folder
    newspaper_downloaded_string = "\n    ".join(newspaper_downloaded)
    if os.path.isdir(config['download_folder']):
        download_folder = \
-            config['download_folder'] if config['download_folder'].endswith('/') else config['download_folder'] + "/"
+            config['download_folder'] \
            if config['download_folder'].endswith(os.path.sep) \
            else config['download_folder'] + os.path.sep
        for n in newspaper_downloaded:
            try:
-                shutil.move(dir_path + 'tmp/' + n, download_folder)
+                shutil.move(os.path.join(tmp_folder, n), download_folder)
            except Exception as e:
-                logging.error(f"Could not move file to download folder \"{download_folder}\"\n{e}", exc_info=True)
+                logging.error(f"Could not move {n} to download folder \"{download_folder}\"\n    {e}")
        if newspaper_downloaded:
            logging.info(f"Downloaded\n    {newspaper_downloaded_string}\n    to {config['download_folder']}")
    else:
        logging.error(f"{config['download_folder']} does not exists.\n    {newspaper_downloaded_string}"
                      f"\n    downloaded to {tmp_folder}")
 if __name__ == '__main__':
    # Set up logging
    logging.basicConfig(
-        filename=dir_path + 'tazPlease.log',
+        filename=os.path.join(dir_path, 'tazPlease.log'),
        level=logging.ERROR,
        format='%(asctime)s - %(message)s'
    )
--- a/models.py
+++ b/models.py
@ -1,13 +1,13 @@
 import os
 import requests
 from requests.exceptions import HTTPError
-from exceptions import TazDownloadFormatException
+from exceptions import TazDownloadFormatException, TazConfigurationError, TazDownloadError
 from exceptions import TazDownloadError
 from bs4 import BeautifulSoup
 from envyaml import EnvYAML
 import argparse
 import filetype
-dir_path = os.path.dirname(os.path.realpath(__file__)) + '/'
+dir_path = os.path.dirname(os.path.realpath(__file__))
 class TazConfiguration:
@ -43,7 +43,7 @@ class TazConfiguration:
    def _load_config(self):
        # Try to load config.yaml
        try:
-            conf_yaml = EnvYAML(dir_path + 'config.yaml', dir_path + '.env')
+            conf_yaml = EnvYAML(os.path.join(dir_path, 'config.yaml'), os.path.join(dir_path, '.env'))
        except Exception as e:
            raise Exception(f"Something went wrong when reading config.yaml.\n{e}")
@ -141,17 +141,17 @@ class TazDownloader:
        except HTTPError as http_e:
            raise TazDownloadError(f"Could not scrape available newspaper editions:\n{http_e}")
-    def download_newspaper(self, taz: str, download_folder: str = dir_path + 'tmp/'):
+    def download_newspaper(self, taz: str, download_folder: str = os.path.join(dir_path, 'tmp')):
        """
-        Downloads a newspaper from dl.taz.de and stores it in tmp/
+        Downloads a newspaper from dl.taz.de and stores it in tmp folder
        """
        # Check if folder exists
        try:
-            if not os.path.isdir(dir_path):
+            if not os.path.isdir(download_folder):
-                os.mkdirs(dir_path)
+                os.makedirs(download_folder)
        except Exception as e:
-            raise TazDownloadError(f"Could find or create \"{dir_path}\":\n{e}")
+            raise TazDownloadError(f"Could find or create \"{download_folder}\":\n{e}")
        # download taz
        try:
@ -167,11 +167,25 @@ class TazDownloader:
                    }
            ) as r:
                # write response to file
-                with open(download_folder + taz, "wb") as f:
+                with open(os.path.join(download_folder, taz), "wb") as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
                # Unfortunately, the taz website does not respond with an http error code if the credentials are wrong.
                # So we have to check if the response is a pdf file or the html page with an error message.
                try:
                    if filetype.guess(os.path.join(download_folder, taz)).mime != 'application/pdf':
                        raise TazDownloadError()
                except (AttributeError, TazDownloadError) as e:
                    # Try to get the error message from the html file to put it in the log
                    with open(os.path.join(download_folder, taz), 'r') as f:
                        soup = BeautifulSoup(f.read(), 'html.parser')
                        error_displayed_on_page = soup.find('p', class_='error').text
                    if error_displayed_on_page:
                        os.remove(os.path.join(download_folder, taz))
                        raise TazDownloadError(error_displayed_on_page)
                    else:
                        os.remove(os.path.join(download_folder, taz))
                        raise TazDownloadError(e)
            return True
        except HTTPError as http_e:
-            raise TazDownloadError(f"Could not download taz:\n{http_e}")
+            raise TazDownloadError(http_e)
        except Exception as e:
            raise TazDownloadError(f"Something went wrong:\n{e}")