Merge branch 'dev' into main

This commit is contained in:
Marc Koch 2021-09-02 16:27:47 +02:00
commit 6286798e6c
3 changed files with 9 additions and 2 deletions

View File

@ -49,7 +49,7 @@ try:
newspaper_available = taz_dl.scrape_newspaper()
# Remove outdated newspaper from download_history.csv
df.drop([f.index for f in df['file'] if f not in newspaper_available], inplace=True)
df.drop([index for index, row in df.iterrows() if row.file not in newspaper_available], inplace=True)
# Find newspaper which are not already downloaded
newspaper_to_download = [n for n in newspaper_available if n not in df.file.values]

View File

@ -41,6 +41,13 @@ class TazDownloader:
Downloads a newspaper from dl.taz.de and stores it in /tmp
"""
# Check if folder exists
try:
if not os.path.isdir(dir_path):
os.mkdirs(dir_path)
except Exception as e:
raise TazDownloadError(f"Could find or create \"{dir_path}\":\n{e}")
# download taz
try:
with requests.get(

View File

@ -1,4 +1,4 @@
pandas~=1.3.2
pandas~=1.1.5
envyaml~=1.8.210417
requests~=2.26.0
beautifulsoup4~=4.9.3