Externalitzades les queries

Afegits mes parametres a .env
Millorat el métode per imprimir en pantalla
This commit is contained in:
2024-11-14 09:39:22 +01:00
parent ceaa55b44e
commit 862c5fec07
3 changed files with 171 additions and 144 deletions
+34 -144
View File
@@ -7,6 +7,7 @@ import os
import random
import requests
import shutil
import sqlite3
import time
import zipfile
from dotenv import load_dotenv
@@ -40,15 +41,16 @@ url_prefix = {
}
# Rutas locales donde depositar los resultados
destination_path = r"/home/sergio/zx/zxdb/games/"
cache_path = r"/home/sergio/zx/zxdb/cache/games/"
temp_file = r"/tmp/zxdb.download.tmp"
destination_path = os.getenv('DESTINATION_PATH')
cache_path = os.getenv('CACHE_PATH')
temp_file = os.getenv('TEMP_FILE')
# Parametros de configuración
should_clear_destination_path = True # Establece si se limpia primero la carpeta de destino
wait = True # Establece una pausa aleatoria entre descargas
min_wait = 2 # Segundos mínimos a esperar entre descargas
max_wait = min_wait + 2 # Segundos máximos a esperar entre descargas
max_wait = 4 # Segundos máximos a esperar entre descargas
elements = []
filetypes_on_root = [
"Tape image",
@@ -57,96 +59,17 @@ filetypes_on_root = [
"POK pokes file",
] # Tipos de fichero que se guardan en la carpeta raíz del juego
def load_queries(file_path):
with open(file_path, 'r') as file:
queries = file.read().split(';')
return [query.strip() for query in queries if query.strip()]
# Carga las consultas desde el archivo
queries = load_queries('queries.sql')
# Listado con las consultas
def select(cursor, query_index=0):
# Lista de consultas
queries = []
# Consulta 0: Devuelve todos los juegos y sus archivos asociados
queries.append("""
SELECT DISTINCT
e.title, l.name, r.release_year, d.file_link, f.text
FROM
((((((publishers p
INNER JOIN entries e ON
p.entry_id = e.id)
INNER JOIN labels l ON
p.label_id = l.id)
INNER JOIN genretypes g ON
e.genretype_id = g.id)
INNER JOIN downloads d ON
e.id = d.entry_id)
INNER JOIN filetypes f ON
d.filetype_id = f.id)
INNER JOIN releases r ON
e.id = r.entry_id AND
p.release_seq = r.release_seq)
WHERE
(e.availabletype_id = 'A' OR e.availabletype_id = 'D') AND
(f.text <> 'Remote link' AND f.text <> '?') AND
r.release_seq = 0 AND
(g.text like '%Game:%' AND g.text not like 'Casual%')
ORDER BY
e.title;
""")
# Consulta 1: Filtra más la consulta anterior
queries.append("""
SELECT DISTINCT
e.title, l.name, r.release_year, d.file_link, f.text
FROM
((((((publishers p
INNER JOIN entries e ON
p.entry_id = e.id)
INNER JOIN labels l ON
p.label_id = l.id)
INNER JOIN genretypes g ON
e.genretype_id = g.id)
INNER JOIN downloads d ON
e.id = d.entry_id)
INNER JOIN filetypes f ON
d.filetype_id = f.id)
INNER JOIN releases r ON
e.id = r.entry_id AND
p.release_seq = r.release_seq)
WHERE
(e.availabletype_id = 'A' OR e.availabletype_id = 'D') AND
(f.text <> 'Remote link' AND f.text <> '?') AND
r.release_seq = 0 AND
l.name like 'ZOSYA%' AND
(g.text like '%Game:%' AND g.text not like 'Casual%')
ORDER BY
e.title;
""")
# Consulta 2: Devuelve juegos y solo archivos de cinta, disco o pokes
queries.append("""
SELECT DISTINCT
e.title, l.name, r.release_year, d.file_link, f.text
FROM
((((((publishers p
INNER JOIN entries e ON
p.entry_id = e.id)
INNERJOIN labels l ON
p.label_id = l.id)
INNERJOIN genretypes g ON
e.genretype_id = g.id)
INNERJOIN downloads d ON
e.id = d.entry_id)
INNERJOIN filetypes f ON
d.filetype_id = f.id)
INNERJOIN releases r ON
e.id = r.entry_id AND
p.release_seq = r.release_seq)
WHERE
(e.availabletype_id = 'A' OR e.availabletype_id = 'D') AND
(f.text IN ('Tape image','Disk image','Snapshot image','POK pokes file')) AND
r.release_seq = 0 AND
(g.text like '%Game:%' AND g.text not like 'Casual%')
ORDER BY
e.title;
""")
# Ejecutar la consulta seleccionada
cursor.execute(queries[query_index])
@@ -228,29 +151,6 @@ def url_filename(url):
return filename
# Descarga un fichero a partir de una URL
'''def download_file(url, dest):
try:
r = requests.get(url)
if r.status_code != 200:
return False
with open(dest, "wb") as f:
f.write(r.content)
return True
except requests.exceptions.Timeout:
# Maybe set up for a retry, or continue in a retry loop
print("Timeout: {}".format(url))
except requests.exceptions.TooManyRedirects:
# Tell the user their URL was bad and try a different one
print("Bad URL: {}".format(url))
except requests.exceptions.RequestException as e:
# catastrophic error. bail.
raise SystemExit(e)'''
def download_file(url, destination):
session = requests.Session()
retries = Retry(total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
@@ -285,6 +185,18 @@ def unzip_file(src, dst):
except Exception as e:
logging.error(f"Ocurrió un error: {e}")
# Imprime el estado de un archivo en el proceso de descarga
def print_status(current_file, total_files, element, total_files_width, status="cached"):
print(
"({:{width}} / {}) : {:<10} : {} ({})".format(
current_file,
total_files,
status,
element["file_name"],
element["filetype"],
width=total_files_width,
)
)
# Obtiene los ficheros de la consulta desde internet o desde la caché
# y los deposita en la carpeta destino, descomprimiendo los archivos necesarios
@@ -336,15 +248,8 @@ def get_files():
unzip_file(cache_file, destination_subfolder)
else:
shutil.copyfile(cache_file, destination_file)
print(
"({:{width}} / {}) : cached : {} ({})".format(
current_file,
total_files,
element["file_name"],
element["filetype"],
width=total_files_width,
)
)
print_status(current_file, total_files, element, total_files_width, status="cached")
# El fichero no está en la cache
else:
status = "not found "
@@ -362,29 +267,14 @@ def get_files():
unzip_file(cache_file, destination_folder)
else:
shutil.copyfile(cache_file, destination_file)
print(
"({:{width}} / {}) : {} : {} ({})".format(
current_file,
total_files,
status,
element["file_name"],
element["filetype"],
width=total_files_width,
)
)
print_status(current_file, total_files, element, total_files_width, status=status)
if wait:
time.sleep(random.randint(min_wait, max_wait))
# El fichero ya existe en el destino
else:
print(
"({:{width}} / {}) : skipping : {} ({})".format(
current_file,
total_files,
element["file_name"],
element["filetype"],
width=total_files_width,
)
)
print_status(current_file, total_files, element, total_files_width, status="skipping")
except Exception as e:
logging.error(f"Error al procesar el fichero {element['file_name']}: {e}")
@@ -433,7 +323,7 @@ def print_elements(mode=0):
# Bucle principal
def main():
connect(query_index=0)
connect(query_index=1)
process_elements()
print_elements(mode=1)
clear_destination_folder()