From 70e5d574c5d20ded6e1d80802bb4b5da871cd515 Mon Sep 17 00:00:00 2001
From: Marc Michalsky <m.michalsky@posteo.de>
Date: Sun, 12 Sep 2021 15:19:56 +0200
Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20use=20os.path.join()=20instead?=
 =?UTF-8?q?=20of=20string=20concatenation=20to=20join=20paths?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change should make the program compatible with other operating systems.
---
 main.py   | 20 ++++++++++++--------
 models.py | 16 ++++++++--------
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/main.py b/main.py
index 3992cfc..dffa08b 100644
--- a/main.py
+++ b/main.py
@@ -9,14 +9,16 @@ from models import TazDownloader, TazConfiguration
 from exceptions import TazConfigurationError, TazDownloadError, TazDownloadFormatException
 
 # Get directory
-dir_path = os.path.dirname(os.path.realpath(__file__)) + '/'
+dir_path = os.path.dirname(os.path.realpath(__file__))
 
 
 def main(config: dict):
-
     # Get german date for tomorrow
     tomorrow = (datetime.now(pytz.timezone('Europe/Berlin')) + timedelta(1)).strftime('%Y_%m_%d')
 
+    # Define tmp/ folder
+    tmp_folder = os.path.join(dir_path, 'tmp')
+
     # Set log level
     try:
         logging.getLogger().setLevel(config['log_level'].upper())
@@ -25,7 +27,7 @@ def main(config: dict):
 
     # Read download history from csv file
     try:
-        df = pd.read_csv(dir_path + 'download_history.csv', header=0)
+        df = pd.read_csv(os.path.join(dir_path, 'download_history.csv'), header=0)
     except FileNotFoundError:
         # In case, there isn't yet a csv file, create data frame with headers
         df = pd.DataFrame(
@@ -69,7 +71,7 @@ def main(config: dict):
     newspaper_downloaded = []
     for n in newspaper_to_download:
         try:
-            if taz_dl.download_newspaper(n):
+            if taz_dl.download_newspaper(n, tmp_folder):
                 newspaper_downloaded.append(n)
         except Exception as e:
             logging.error(f"Could not download {n}\n{e}", exc_info=True)
@@ -85,17 +87,19 @@ def main(config: dict):
             )
             df = df.append(df_tmp, ignore_index=True)
         df.sort_values(by='file', ascending=False, inplace=True)
-        df.to_csv(dir_path + 'download_history.csv', index=False)
+        df.to_csv(os.path.join(dir_path, 'download_history.csv'), index=False)
     except Exception as e:
         logging.error(f"Could not update download_history.csv\n{e}", exc_info=True)
 
     # Move downloaded file to download folder
     if os.path.isdir(config['download_folder']):
         download_folder = \
-            config['download_folder'] if config['download_folder'].endswith('/') else config['download_folder'] + "/"
+            config['download_folder'] \
+            if config['download_folder'].endswith(os.path.sep) \
+            else config['download_folder'] + os.path.sep
         for n in newspaper_downloaded:
             try:
-                shutil.move(dir_path + 'tmp/' + n, download_folder)
+                shutil.move(os.path.join(tmp_folder, n), download_folder)
             except Exception as e:
                 logging.error(f"Could not move file to download folder \"{download_folder}\"\n{e}", exc_info=True)
 
@@ -104,7 +108,7 @@ if __name__ == '__main__':
 
     # Set up logging
     logging.basicConfig(
-        filename=dir_path + 'tazPlease.log',
+        filename=os.path.join(dir_path, 'tazPlease.log'),
         level=logging.ERROR,
         format='%(asctime)s - %(message)s'
     )
diff --git a/models.py b/models.py
index ffb1571..81bd11c 100644
--- a/models.py
+++ b/models.py
@@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
 from envyaml import EnvYAML
 import argparse
 
-dir_path = os.path.dirname(os.path.realpath(__file__)) + '/'
+dir_path = os.path.dirname(os.path.realpath(__file__))
 
 
 class TazConfiguration:
@@ -43,7 +43,7 @@ class TazConfiguration:
     def _load_config(self):
         # Try to load config.yaml
         try:
-            conf_yaml = EnvYAML(dir_path + 'config.yaml', dir_path + '.env')
+            conf_yaml = EnvYAML(os.path.join(dir_path, 'config.yaml'), os.path.join(dir_path, '.env'))
         except Exception as e:
             raise Exception(f"Something went wrong when reading config.yaml.\n{e}")
 
@@ -141,17 +141,17 @@ class TazDownloader:
         except HTTPError as http_e:
             raise TazDownloadError(f"Could not scrape available newspaper editions:\n{http_e}")
 
-    def download_newspaper(self, taz: str, download_folder: str = dir_path + 'tmp/'):
+    def download_newspaper(self, taz: str, download_folder: str = os.path.join(dir_path, 'tmp')):
         """
-        Downloads a newspaper from dl.taz.de and stores it in tmp/
+        Downloads a newspaper from dl.taz.de and stores it in tmp folder
         """
 
         # Check if folder exists
         try:
-            if not os.path.isdir(dir_path):
-                os.mkdirs(dir_path)
+            if not os.path.isdir(download_folder):
+                os.makedirs(download_folder)
         except Exception as e:
-            raise TazDownloadError(f"Could find or create \"{dir_path}\":\n{e}")
+            raise TazDownloadError(f"Could find or create \"{download_folder}\":\n{e}")
 
         # download taz
         try:
@@ -167,7 +167,7 @@ class TazDownloader:
                     }
             ) as r:
                 # write response to file
-                with open(download_folder + taz, "wb") as f:
+                with open(os.path.join(download_folder, taz), "wb") as f:
                     for chunk in r.iter_content(chunk_size=8192):
                         f.write(chunk)
             return True

From e1380d81c2d20218219d594321b45887581a7d70 Mon Sep 17 00:00:00 2001
From: Marc Michalsky <m.michalsky@posteo.de>
Date: Sun, 12 Sep 2021 15:23:01 +0200
Subject: [PATCH 2/3] =?UTF-8?q?=E2=9C=A8=20add=20error=20handling=20for=20?=
 =?UTF-8?q?wrong=20credentials?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unfortunately, the taz website does not respond with an http error code if the credentials are wrong. So we have to check if the response is a pdf file or the html page with an error message.
---
 models.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/models.py b/models.py
index 81bd11c..8e24ef5 100644
--- a/models.py
+++ b/models.py
@@ -1,11 +1,11 @@
 import os
 import requests
 from requests.exceptions import HTTPError
-from exceptions import TazDownloadFormatException
-from exceptions import TazDownloadError
+from exceptions import TazDownloadFormatException, TazConfigurationError, TazDownloadError
 from bs4 import BeautifulSoup
 from envyaml import EnvYAML
 import argparse
+import filetype
 
 dir_path = os.path.dirname(os.path.realpath(__file__))
 
@@ -170,8 +170,22 @@ class TazDownloader:
                 with open(os.path.join(download_folder, taz), "wb") as f:
                     for chunk in r.iter_content(chunk_size=8192):
                         f.write(chunk)
+                # Unfortunately, the taz website does not respond with an http error code if the credentials are wrong.
+                # So we have to check if the response is a pdf file or the html page with an error message.
+                try:
+                    if filetype.guess(os.path.join(download_folder, taz)).mime != 'application/pdf':
+                        raise TazDownloadError()
+                except (AttributeError, TazDownloadError) as e:
+                    # Try to get the error message from the html file to put it in the log
+                    with open(os.path.join(download_folder, taz), 'r') as f:
+                        soup = BeautifulSoup(f.read(), 'html.parser')
+                        error_displayed_on_page = soup.find('p', class_='error').text
+                    if error_displayed_on_page:
+                        os.remove(os.path.join(download_folder, taz))
+                        raise TazDownloadError(error_displayed_on_page)
+                    else:
+                        os.remove(os.path.join(download_folder, taz))
+                        raise TazDownloadError(e)
             return True
         except HTTPError as http_e:
-            raise TazDownloadError(f"Could not download taz:\n{http_e}")
-        except Exception as e:
-            raise TazDownloadError(f"Something went wrong:\n{e}")
+            raise TazDownloadError(http_e)

From fec991792b321dab30e9fa7891fd2c557fb7de23 Mon Sep 17 00:00:00 2001
From: Marc Michalsky <m.michalsky@posteo.de>
Date: Sun, 12 Sep 2021 15:25:58 +0200
Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=92=84nicer=20log=20messages?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 main.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/main.py b/main.py
index dffa08b..a0011ce 100644
--- a/main.py
+++ b/main.py
@@ -23,7 +23,7 @@ def main(config: dict):
     try:
         logging.getLogger().setLevel(config['log_level'].upper())
     except ValueError as e:
-        logging.error(f"Could not set log level.\n{e}", exc_info=True)
+        logging.error(f"Could not set log level.\n    {e}")
 
     # Read download history from csv file
     try:
@@ -44,14 +44,13 @@ def main(config: dict):
                 logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.')
                 sys.exit(0)
         except Exception as e:
-            logging.error(f"Could not check whether tomorrow's newspaper has already been downloaded.\n{e}",
-                          exc_info=True)
+            logging.error(f"Could not check whether tomorrow's newspaper has already been downloaded.\n    {e}")
 
     # Instantiate downloader object
     try:
         taz_dl = TazDownloader(config['id'], config['password'], config['download_format'])
     except TazDownloadFormatException as e:
-        logging.error(e, exc_info=True)
+        logging.error(e)
         sys.exit(1)
 
     try:
@@ -64,7 +63,7 @@ def main(config: dict):
         # Find newspaper which are not already downloaded
         newspaper_to_download = [n for n in newspaper_available if n not in df.file.values]
     except TazDownloadError as e:
-        logging.error(e, exc_info=True)
+        logging.error(e)
         sys.exit(1)
 
     # Download newspaper
@@ -74,7 +73,7 @@ def main(config: dict):
             if taz_dl.download_newspaper(n, tmp_folder):
                 newspaper_downloaded.append(n)
         except Exception as e:
-            logging.error(f"Could not download {n}\n{e}", exc_info=True)
+            logging.error(f"Could not download {n}\n    {e}")
 
     # Add downloaded newspaper to download_history.csv
     try:
@@ -89,9 +88,10 @@ def main(config: dict):
         df.sort_values(by='file', ascending=False, inplace=True)
         df.to_csv(os.path.join(dir_path, 'download_history.csv'), index=False)
     except Exception as e:
-        logging.error(f"Could not update download_history.csv\n{e}", exc_info=True)
+        logging.error(f"Could not update download_history.csv\n    {e}")
 
     # Move downloaded file to download folder
+    newspaper_downloaded_string = "\n    ".join(newspaper_downloaded)
     if os.path.isdir(config['download_folder']):
         download_folder = \
             config['download_folder'] \
@@ -101,7 +101,12 @@ def main(config: dict):
             try:
                 shutil.move(os.path.join(tmp_folder, n), download_folder)
             except Exception as e:
-                logging.error(f"Could not move file to download folder \"{download_folder}\"\n{e}", exc_info=True)
+                logging.error(f"Could not move {n} to download folder \"{download_folder}\"\n    {e}")
+        if newspaper_downloaded:
+            logging.info(f"Downloaded\n    {newspaper_downloaded_string}\n    to {config['download_folder']}")
+    else:
+        logging.error(f"{config['download_folder']} does not exists.\n    {newspaper_downloaded_string}"
+                      f"\n    downloaded to {tmp_folder}")
 
 
 if __name__ == '__main__':