✨️ use download_history.csv instead of lock file to limit queries
This commit is contained in:
parent
6e8d9d9ef8
commit
2af37f96a4
|
|
@ -11,9 +11,10 @@ download_format: "pdf"
|
||||||
# Where should the downloaded files be stored?
|
# Where should the downloaded files be stored?
|
||||||
download_folder: "/path/to/download/folder"
|
download_folder: "/path/to/download/folder"
|
||||||
|
|
||||||
# Use a lock file that indicates whether tomorrow's newspaper has already been downloaded to limit the number of times
|
# Before the program searches for new download candidates, it checks whether tomorrow's newspaper has already been
|
||||||
# the taz.de website is queried for new editions
|
# downloaded by the number of queries on the taz.de website.
|
||||||
use_lock_file: True
|
# If you want to download all available newspaper missing in the download_history.csv, set this value to False.
|
||||||
|
limit_requests: True
|
||||||
|
|
||||||
# Set the log level.
|
# Set the log level.
|
||||||
# Valid formats are: notset, debug, info, warning, error, critical
|
# Valid formats are: notset, debug, info, warning, error, critical
|
||||||
|
|
|
||||||
38
main.py
38
main.py
|
|
@ -23,22 +23,6 @@ def main(config: dict):
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
logging.error(f"Could not set log level.\n{e}", exc_info=True)
|
logging.error(f"Could not set log level.\n{e}", exc_info=True)
|
||||||
|
|
||||||
# If 'use_lock_file' configuration is set, check if lockfile exists for tomorrow's newspaper
|
|
||||||
if config['use_lock_file']:
|
|
||||||
try:
|
|
||||||
lock_files = [entry for entry in os.listdir(dir_path) if os.path.isfile(entry) and entry.endswith('.lock')]
|
|
||||||
# Delete all lock files that do not refer to tomorrow's date
|
|
||||||
for file in lock_files:
|
|
||||||
if not file.startswith('.' + tomorrow):
|
|
||||||
os.remove(dir_path + file)
|
|
||||||
# If there is a lock file for tomorrow, exit the program
|
|
||||||
for file in lock_files:
|
|
||||||
if file.startswith('.' + tomorrow):
|
|
||||||
logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.')
|
|
||||||
sys.exit(0)
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Could not check for lock files.\n{e}", exc_info=True)
|
|
||||||
|
|
||||||
# Read download history from csv file
|
# Read download history from csv file
|
||||||
try:
|
try:
|
||||||
df = pd.read_csv(dir_path + 'download_history.csv', header=0)
|
df = pd.read_csv(dir_path + 'download_history.csv', header=0)
|
||||||
|
|
@ -51,6 +35,16 @@ def main(config: dict):
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# If the 'limit_requests' argument is specified, check whether tomorrow's newspaper has already been downloaded
|
||||||
|
if config['limit_requests']:
|
||||||
|
try:
|
||||||
|
if any(df.file.str.contains(pat=tomorrow)):
|
||||||
|
logging.info('Tomorrow\'s newspaper was already downloaded. Execution canceled.')
|
||||||
|
sys.exit(0)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Could not check whether tomorrow's newspaper has already been downloaded.\n{e}",
|
||||||
|
exc_info=True)
|
||||||
|
|
||||||
# Instantiate downloader object
|
# Instantiate downloader object
|
||||||
try:
|
try:
|
||||||
taz_dl = TazDownloader(config['id'], config['password'], config['download_format'])
|
taz_dl = TazDownloader(config['id'], config['password'], config['download_format'])
|
||||||
|
|
@ -59,7 +53,7 @@ def main(config: dict):
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get newspapers available for download
|
# Get newspaper available for download
|
||||||
newspaper_available = taz_dl.scrape_newspaper()
|
newspaper_available = taz_dl.scrape_newspaper()
|
||||||
|
|
||||||
# Remove outdated newspaper from download_history.csv
|
# Remove outdated newspaper from download_history.csv
|
||||||
|
|
@ -80,16 +74,6 @@ def main(config: dict):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Could not download {n}\n{e}", exc_info=True)
|
logging.error(f"Could not download {n}\n{e}", exc_info=True)
|
||||||
|
|
||||||
# Create lock file for tomorrow
|
|
||||||
if config['use_lock_file']:
|
|
||||||
try:
|
|
||||||
lock_file = '.' + tomorrow + '.lock'
|
|
||||||
for n in newspaper_downloaded:
|
|
||||||
if n.startswith('taz_' + tomorrow):
|
|
||||||
os.mknod(dir_path + lock_file)
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Could not download create lock file \"{lock_file}\"\n{e}", exc_info=True)
|
|
||||||
|
|
||||||
# Add downloaded newspaper to download_history.csv
|
# Add downloaded newspaper to download_history.csv
|
||||||
try:
|
try:
|
||||||
for n in newspaper_downloaded:
|
for n in newspaper_downloaded:
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ class TazConfiguration:
|
||||||
('password', True),
|
('password', True),
|
||||||
('download_format', False),
|
('download_format', False),
|
||||||
('download_folder', True),
|
('download_folder', True),
|
||||||
('use_lock_file', False),
|
('limit_requests', False),
|
||||||
('log_level', False),
|
('log_level', False),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -94,7 +94,7 @@ class TazConfiguration:
|
||||||
)
|
)
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
'-l',
|
'-l',
|
||||||
'--use_lock_file',
|
'--limit-requests',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
default=None
|
default=None
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue