Merge branch 'dev' into main
This commit is contained in:
commit
6286798e6c
2
main.py
2
main.py
|
|
@ -49,7 +49,7 @@ try:
|
|||
newspaper_available = taz_dl.scrape_newspaper()
|
||||
|
||||
# Remove outdated newspaper from download_history.csv
|
||||
df.drop([f.index for f in df['file'] if f not in newspaper_available], inplace=True)
|
||||
df.drop([index for index, row in df.iterrows() if row.file not in newspaper_available], inplace=True)
|
||||
|
||||
# Find newspaper which are not already downloaded
|
||||
newspaper_to_download = [n for n in newspaper_available if n not in df.file.values]
|
||||
|
|
|
|||
|
|
@ -41,6 +41,13 @@ class TazDownloader:
|
|||
Downloads a newspaper from dl.taz.de and stores it in /tmp
|
||||
"""
|
||||
|
||||
# Check if folder exists
|
||||
try:
|
||||
if not os.path.isdir(dir_path):
|
||||
os.mkdirs(dir_path)
|
||||
except Exception as e:
|
||||
raise TazDownloadError(f"Could find or create \"{dir_path}\":\n{e}")
|
||||
|
||||
# download taz
|
||||
try:
|
||||
with requests.get(
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
pandas~=1.3.2
|
||||
pandas~=1.1.5
|
||||
envyaml~=1.8.210417
|
||||
requests~=2.26.0
|
||||
beautifulsoup4~=4.9.3
|
||||
Loading…
Reference in New Issue