Merge branch 'dev' into main
This commit is contained in:
commit
46b049b1f7
|
|
@ -11,9 +11,10 @@ download_format: "pdf"
|
|||
# Where should the downloaded files be stored?
|
||||
download_folder: "/path/to/download/folder"
|
||||
|
||||
# Use a lock file that indicates whether tomorrow's newspaper has already been downloaded to limit the number of times
|
||||
# the taz.de website is queried for new editions
|
||||
use_lock_file: True
|
||||
# Before the program searches for new download candidates, it checks whether tomorrow's newspaper has already been
|
||||
# downloaded by the number of queries on the taz.de website.
|
||||
# If you want to download all available newspaper missing in the download_history.csv, set this value to False.
|
||||
limit_requests: True
|
||||
|
||||
# Set the log level.
|
||||
# Valid formats are: notset, debug, info, warning, error, critical
|
||||
|
|
|
|||
38
main.py
38
main.py
|
|
@ -23,22 +23,6 @@ def main(config: dict):
|
|||
except ValueError as e:
|
||||
logging.error(f"Could not set log level.\n{e}", exc_info=True)
|
||||
|
||||
# If 'use_lock_file' configuration is set, check if lockfile exists for tomorrow's newspaper
|
||||
if config['use_lock_file']:
|
||||
try:
|
||||
lock_files = [entry for entry in os.listdir(dir_path) if os.path.isfile(entry) and entry.endswith('.lock')]
|
||||
# Delete all lock files that do not refer to tomorrow's date
|
||||
for file in lock_files:
|
||||
if not file.startswith('.' + tomorrow):
|
||||
os.remove(dir_path + file)
|
||||
# If there is a lock file for tomorrow, exit the program
|
||||
for file in lock_files:
|
||||
if file.startswith('.' + tomorrow):
|
||||
logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.')
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
logging.error(f"Could not check for lock files.\n{e}", exc_info=True)
|
||||
|
||||
# Read download history from csv file
|
||||
try:
|
||||
df = pd.read_csv(dir_path + 'download_history.csv', header=0)
|
||||
|
|
@ -51,6 +35,16 @@ def main(config: dict):
|
|||
]
|
||||
)
|
||||
|
||||
# If the 'limit_requests' argument is specified, check whether tomorrow's newspaper has already been downloaded
|
||||
if config['limit_requests']:
|
||||
try:
|
||||
if any(df.file.str.contains(pat=tomorrow)):
|
||||
logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.')
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
logging.error(f"Could not check whether tomorrow's newspaper has already been downloaded.\n{e}",
|
||||
exc_info=True)
|
||||
|
||||
# Instantiate downloader object
|
||||
try:
|
||||
taz_dl = TazDownloader(config['id'], config['password'], config['download_format'])
|
||||
|
|
@ -59,7 +53,7 @@ def main(config: dict):
|
|||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Get newspapers available for download
|
||||
# Get newspaper available for download
|
||||
newspaper_available = taz_dl.scrape_newspaper()
|
||||
|
||||
# Remove outdated newspaper from download_history.csv
|
||||
|
|
@ -80,16 +74,6 @@ def main(config: dict):
|
|||
except Exception as e:
|
||||
logging.error(f"Could not download {n}\n{e}", exc_info=True)
|
||||
|
||||
# Create lock file for tomorrow
|
||||
if config['use_lock_file']:
|
||||
try:
|
||||
lock_file = '.' + tomorrow + '.lock'
|
||||
for n in newspaper_downloaded:
|
||||
if n.startswith('taz_' + tomorrow):
|
||||
os.mknod(dir_path + lock_file)
|
||||
except Exception as e:
|
||||
logging.error(f"Could not download create lock file \"{lock_file}\"\n{e}", exc_info=True)
|
||||
|
||||
# Add downloaded newspaper to download_history.csv
|
||||
try:
|
||||
for n in newspaper_downloaded:
|
||||
|
|
|
|||
14
models.py
14
models.py
|
|
@ -25,7 +25,7 @@ class TazConfiguration:
|
|||
('password', True),
|
||||
('download_format', False),
|
||||
('download_folder', True),
|
||||
('use_lock_file', False),
|
||||
('limit_requests', False),
|
||||
('log_level', False),
|
||||
]
|
||||
|
||||
|
|
@ -65,19 +65,21 @@ class TazConfiguration:
|
|||
Parse command line arguments.
|
||||
"""
|
||||
argparser = argparse.ArgumentParser(
|
||||
description='Download taz e-paper'
|
||||
description='Download taz e-paper',
|
||||
)
|
||||
argparser.add_argument(
|
||||
'-i',
|
||||
'--id',
|
||||
action='store',
|
||||
type=str,
|
||||
help='Your taz-ID',
|
||||
)
|
||||
argparser.add_argument(
|
||||
'-p',
|
||||
'--password',
|
||||
action='store',
|
||||
type=str,
|
||||
help='Your password',
|
||||
)
|
||||
argparser.add_argument(
|
||||
'-f',
|
||||
|
|
@ -85,23 +87,27 @@ class TazConfiguration:
|
|||
action='store',
|
||||
type=str,
|
||||
choices=['pdf', 'epub', 'epubt', 'html', 'ascii', 'mobi', 'mobit'],
|
||||
help='The e-paper format',
|
||||
)
|
||||
argparser.add_argument(
|
||||
'-d',
|
||||
'--download_folder',
|
||||
action='store',
|
||||
type=str,
|
||||
help='The path to a folder where the e-paper should be stored',
|
||||
)
|
||||
argparser.add_argument(
|
||||
'-l',
|
||||
'--use_lock_file',
|
||||
'--limit-requests',
|
||||
action='store_true',
|
||||
default=None
|
||||
default=None,
|
||||
help='Only query website for available newspaper if tomorrow\'s newspaper has not already been downloaded',
|
||||
)
|
||||
argparser.add_argument(
|
||||
'--log_level',
|
||||
action='store',
|
||||
choices=['notset', 'debug', 'info', 'warning', 'error', 'critical'],
|
||||
help='Set the log level',
|
||||
)
|
||||
return argparser.parse_args()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue