Merge branch 'dev' into main

This commit is contained in:
Marc Koch 2021-09-09 20:55:26 +02:00
commit 46b049b1f7
3 changed files with 25 additions and 34 deletions

View File

@ -11,9 +11,10 @@ download_format: "pdf"
# Where should the downloaded files be stored?
download_folder: "/path/to/download/folder"
# Use a lock file that indicates whether tomorrow's newspaper has already been downloaded to limit the number of times
# the taz.de website is queried for new editions
use_lock_file: True
# Before the program searches for new download candidates, it checks whether tomorrow's newspaper has already been
# downloaded by the number of queries on the taz.de website.
# If you want to download all available newspaper missing in the download_history.csv, set this value to False.
limit_requests: True
# Set the log level.
# Valid formats are: notset, debug, info, warning, error, critical

38
main.py
View File

@ -23,22 +23,6 @@ def main(config: dict):
except ValueError as e:
logging.error(f"Could not set log level.\n{e}", exc_info=True)
# If 'use_lock_file' configuration is set, check if lockfile exists for tomorrow's newspaper
if config['use_lock_file']:
try:
lock_files = [entry for entry in os.listdir(dir_path) if os.path.isfile(entry) and entry.endswith('.lock')]
# Delete all lock files that do not refer to tomorrow's date
for file in lock_files:
if not file.startswith('.' + tomorrow):
os.remove(dir_path + file)
# If there is a lock file for tomorrow, exit the program
for file in lock_files:
if file.startswith('.' + tomorrow):
logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.')
sys.exit(0)
except Exception as e:
logging.error(f"Could not check for lock files.\n{e}", exc_info=True)
# Read download history from csv file
try:
df = pd.read_csv(dir_path + 'download_history.csv', header=0)
@ -51,6 +35,16 @@ def main(config: dict):
]
)
# If the 'limit_requests' argument is specified, check whether tomorrow's newspaper has already been downloaded
if config['limit_requests']:
try:
if any(df.file.str.contains(pat=tomorrow)):
logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.')
sys.exit(0)
except Exception as e:
logging.error(f"Could not check whether tomorrow's newspaper has already been downloaded.\n{e}",
exc_info=True)
# Instantiate downloader object
try:
taz_dl = TazDownloader(config['id'], config['password'], config['download_format'])
@ -59,7 +53,7 @@ def main(config: dict):
sys.exit(1)
try:
# Get newspapers available for download
# Get newspaper available for download
newspaper_available = taz_dl.scrape_newspaper()
# Remove outdated newspaper from download_history.csv
@ -80,16 +74,6 @@ def main(config: dict):
except Exception as e:
logging.error(f"Could not download {n}\n{e}", exc_info=True)
# Create lock file for tomorrow
if config['use_lock_file']:
try:
lock_file = '.' + tomorrow + '.lock'
for n in newspaper_downloaded:
if n.startswith('taz_' + tomorrow):
os.mknod(dir_path + lock_file)
except Exception as e:
logging.error(f"Could not download create lock file \"{lock_file}\"\n{e}", exc_info=True)
# Add downloaded newspaper to download_history.csv
try:
for n in newspaper_downloaded:

View File

@ -25,7 +25,7 @@ class TazConfiguration:
('password', True),
('download_format', False),
('download_folder', True),
('use_lock_file', False),
('limit_requests', False),
('log_level', False),
]
@ -65,19 +65,21 @@ class TazConfiguration:
Parse command line arguments.
"""
argparser = argparse.ArgumentParser(
description='Download taz e-paper'
description='Download taz e-paper',
)
argparser.add_argument(
'-i',
'--id',
action='store',
type=str,
help='Your taz-ID',
)
argparser.add_argument(
'-p',
'--password',
action='store',
type=str,
help='Your password',
)
argparser.add_argument(
'-f',
@ -85,23 +87,27 @@ class TazConfiguration:
action='store',
type=str,
choices=['pdf', 'epub', 'epubt', 'html', 'ascii', 'mobi', 'mobit'],
help='The e-paper format',
)
argparser.add_argument(
'-d',
'--download_folder',
action='store',
type=str,
help='The path to a folder where the e-paper should be stored',
)
argparser.add_argument(
'-l',
'--use_lock_file',
'--limit-requests',
action='store_true',
default=None
default=None,
help='Only query website for available newspaper if tomorrow\'s newspaper has not already been downloaded',
)
argparser.add_argument(
'--log_level',
action='store',
choices=['notset', 'debug', 'info', 'warning', 'error', 'critical'],
help='Set the log level',
)
return argparser.parse_args()