From 2af37f96a469b172d7bc3e76568ae66e1e1aced6 Mon Sep 17 00:00:00 2001 From: Marc Michalsky Date: Thu, 9 Sep 2021 20:33:20 +0200 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9C=A8=EF=B8=8F=20use=20download=5Fhisto?= =?UTF-8?q?ry.csv=20instead=20of=20lock=20file=20to=20limit=20queries?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- example_config.yaml | 7 ++++--- main.py | 38 +++++++++++--------------------------- models.py | 4 ++-- 3 files changed, 17 insertions(+), 32 deletions(-) diff --git a/example_config.yaml b/example_config.yaml index e5ac1e2..504e9e1 100644 --- a/example_config.yaml +++ b/example_config.yaml @@ -11,9 +11,10 @@ download_format: "pdf" # Where should the downloaded files be stored? download_folder: "/path/to/download/folder" -# Use a lock file that indicates whether tomorrow's newspaper has already been downloaded to limit the number of times -# the taz.de website is queried for new editions -use_lock_file: True +# Before the program searches for new download candidates, it checks whether tomorrow's newspaper has already been +# downloaded by the number of queries on the taz.de website. +# If you want to download all available newspaper missing in the download_history.csv, set this value to False. +limit_requests: True # Set the log level. # Valid formats are: notset, debug, info, warning, error, critical diff --git a/main.py b/main.py index 79b82bc..3992cfc 100644 --- a/main.py +++ b/main.py @@ -23,22 +23,6 @@ def main(config: dict): except ValueError as e: logging.error(f"Could not set log level.\n{e}", exc_info=True) - # If 'use_lock_file' configuration is set, check if lockfile exists for tomorrow's newspaper - if config['use_lock_file']: - try: - lock_files = [entry for entry in os.listdir(dir_path) if os.path.isfile(entry) and entry.endswith('.lock')] - # Delete all lock files that do not refer to tomorrow's date - for file in lock_files: - if not file.startswith('.' + tomorrow): - os.remove(dir_path + file) - # If there is a lock file for tomorrow, exit the program - for file in lock_files: - if file.startswith('.' + tomorrow): - logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.') - sys.exit(0) - except Exception as e: - logging.error(f"Could not check for lock files.\n{e}", exc_info=True) - # Read download history from csv file try: df = pd.read_csv(dir_path + 'download_history.csv', header=0) @@ -51,6 +35,16 @@ def main(config: dict): ] ) + # If the 'limit_requests' argument is specified, check whether tomorrow's newspaper has already been downloaded + if config['limit_requests']: + try: + if any(df.file.str.contains(pat=tomorrow)): + logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.') + sys.exit(0) + except Exception as e: + logging.error(f"Could not check whether tomorrow's newspaper has already been downloaded.\n{e}", + exc_info=True) + # Instantiate downloader object try: taz_dl = TazDownloader(config['id'], config['password'], config['download_format']) @@ -59,7 +53,7 @@ def main(config: dict): sys.exit(1) try: - # Get newspapers available for download + # Get newspaper available for download newspaper_available = taz_dl.scrape_newspaper() # Remove outdated newspaper from download_history.csv @@ -80,16 +74,6 @@ def main(config: dict): except Exception as e: logging.error(f"Could not download {n}\n{e}", exc_info=True) - # Create lock file for tomorrow - if config['use_lock_file']: - try: - lock_file = '.' + tomorrow + '.lock' - for n in newspaper_downloaded: - if n.startswith('taz_' + tomorrow): - os.mknod(dir_path + lock_file) - except Exception as e: - logging.error(f"Could not download create lock file \"{lock_file}\"\n{e}", exc_info=True) - # Add downloaded newspaper to download_history.csv try: for n in newspaper_downloaded: diff --git a/models.py b/models.py index 3396363..26cc6cc 100644 --- a/models.py +++ b/models.py @@ -25,7 +25,7 @@ class TazConfiguration: ('password', True), ('download_format', False), ('download_folder', True), - ('use_lock_file', False), + ('limit_requests', False), ('log_level', False), ] @@ -94,7 +94,7 @@ class TazConfiguration: ) argparser.add_argument( '-l', - '--use_lock_file', + '--limit-requests', action='store_true', default=None ) From dfa721e7c0ce12525a61692b77b0ea9778837301 Mon Sep 17 00:00:00 2001 From: Marc Michalsky Date: Thu, 9 Sep 2021 20:53:19 +0200 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9Dadd=20help=20text=20to=20argume?= =?UTF-8?q?nts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/models.py b/models.py index 26cc6cc..ffb1571 100644 --- a/models.py +++ b/models.py @@ -65,19 +65,21 @@ class TazConfiguration: Parse command line arguments. """ argparser = argparse.ArgumentParser( - description='Download taz e-paper' + description='Download taz e-paper', ) argparser.add_argument( '-i', '--id', action='store', type=str, + help='Your taz-ID', ) argparser.add_argument( '-p', '--password', action='store', type=str, + help='Your password', ) argparser.add_argument( '-f', @@ -85,23 +87,27 @@ class TazConfiguration: action='store', type=str, choices=['pdf', 'epub', 'epubt', 'html', 'ascii', 'mobi', 'mobit'], + help='The e-paper format', ) argparser.add_argument( '-d', '--download_folder', action='store', type=str, + help='The path to a folder where the e-paper should be stored', ) argparser.add_argument( '-l', '--limit-requests', action='store_true', - default=None + default=None, + help='Only query website for available newspaper if tomorrow\'s newspaper has not already been downloaded', ) argparser.add_argument( '--log_level', action='store', choices=['notset', 'debug', 'info', 'warning', 'error', 'critical'], + help='Set the log level', ) return argparser.parse_args()