Merge branch 'dev' into main
This commit is contained in:
commit
6286798e6c
2
main.py
2
main.py
|
|
@ -49,7 +49,7 @@ try:
|
||||||
newspaper_available = taz_dl.scrape_newspaper()
|
newspaper_available = taz_dl.scrape_newspaper()
|
||||||
|
|
||||||
# Remove outdated newspaper from download_history.csv
|
# Remove outdated newspaper from download_history.csv
|
||||||
df.drop([f.index for f in df['file'] if f not in newspaper_available], inplace=True)
|
df.drop([index for index, row in df.iterrows() if row.file not in newspaper_available], inplace=True)
|
||||||
|
|
||||||
# Find newspaper which are not already downloaded
|
# Find newspaper which are not already downloaded
|
||||||
newspaper_to_download = [n for n in newspaper_available if n not in df.file.values]
|
newspaper_to_download = [n for n in newspaper_available if n not in df.file.values]
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,13 @@ class TazDownloader:
|
||||||
Downloads a newspaper from dl.taz.de and stores it in /tmp
|
Downloads a newspaper from dl.taz.de and stores it in /tmp
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Check if folder exists
|
||||||
|
try:
|
||||||
|
if not os.path.isdir(dir_path):
|
||||||
|
os.mkdirs(dir_path)
|
||||||
|
except Exception as e:
|
||||||
|
raise TazDownloadError(f"Could find or create \"{dir_path}\":\n{e}")
|
||||||
|
|
||||||
# download taz
|
# download taz
|
||||||
try:
|
try:
|
||||||
with requests.get(
|
with requests.get(
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
pandas~=1.3.2
|
pandas~=1.1.5
|
||||||
envyaml~=1.8.210417
|
envyaml~=1.8.210417
|
||||||
requests~=2.26.0
|
requests~=2.26.0
|
||||||
beautifulsoup4~=4.9.3
|
beautifulsoup4~=4.9.3
|
||||||
Loading…
Reference in New Issue