diff --git a/README.MD b/README.MD index 46ed41a..36217e1 100644 --- a/README.MD +++ b/README.MD @@ -36,3 +36,6 @@ Exemple de fitxer .env DB_HOST=172.18.0.2 DB_PORT=3306 DB_NAME=zxdb + DESTINATION_PATH=/home/sergio/zx/zxdb/games/ + CACHE_PATH=/home/sergio/zx/zxdb/cache/games/ + TEMP_FILE=/tmp/zxdb.download.tmp diff --git a/queries.sql b/queries.sql new file mode 100644 index 0000000..319f104 --- /dev/null +++ b/queries.sql @@ -0,0 +1,134 @@ +-- Consulta 0: Devuelve todos los juegos y sus archivos asociados +SELECT DISTINCT + e.title, + l.name, + r.release_year, + d.file_link, + f.text +FROM + ( + ( + ( + ( + ( + ( + publishers p + INNER JOIN entries e ON p.entry_id = e.id + ) + INNER JOIN labels l ON p.label_id = l.id + ) + INNER JOIN genretypes g ON e.genretype_id = g.id + ) + INNER JOIN downloads d ON e.id = d.entry_id + ) + INNER JOIN filetypes f ON d.filetype_id = f.id + ) + INNER JOIN releases r ON e.id = r.entry_id + AND p.release_seq = r.release_seq + ) +WHERE + ( + e.availabletype_id = 'A' + OR e.availabletype_id = 'D' + ) + AND ( + f.text <> 'Remote link' + AND f.text <> '?' + ) + AND r.release_seq = 0 + AND ( + g.text like '%Game:%' + AND g.text not like 'Casual%' + ) +ORDER BY + e.title; + +-- Consulta 1: Filtra más la consulta anterior +SELECT DISTINCT + e.title, + l.name, + r.release_year, + d.file_link, + f.text +FROM + ( + ( + ( + ( + ( + ( + publishers p + INNER JOIN entries e ON p.entry_id = e.id + ) + INNER JOIN labels l ON p.label_id = l.id + ) + INNER JOIN genretypes g ON e.genretype_id = g.id + ) + INNER JOIN downloads d ON e.id = d.entry_id + ) + INNER JOIN filetypes f ON d.filetype_id = f.id + ) + INNER JOIN releases r ON e.id = r.entry_id + AND p.release_seq = r.release_seq + ) +WHERE + ( + e.availabletype_id = 'A' + OR e.availabletype_id = 'D' + ) + AND ( + f.text <> 'Remote link' + AND f.text <> '?' + ) + AND r.release_seq = 0 + AND l.name like 'ZOSYA%' + AND ( + g.text like '%Game:%' + AND g.text not like 'Casual%' + ) +ORDER BY + e.title; + +-- Consulta 2: Devuelve juegos y solo archivos de cinta, disco o pokes +SELECT DISTINCT + e.title, + l.name, + r.release_year, + d.file_link, + f.text +FROM + ( + ( + ( + ( + ( + ( + publishers p + INNER JOIN entries e ON p.entry_id = e.id + ) INNER JOIN labels l ON p.label_id = l.id + ) INNER JOIN genretypes g ON e.genretype_id = g.id + ) INNER JOIN downloads d ON e.id = d.entry_id + ) INNER JOIN filetypes f ON d.filetype_id = f.id + ) INNER JOIN releases r ON e.id = r.entry_id + AND p.release_seq = r.release_seq + ) +WHERE + ( + e.availabletype_id = 'A' + OR e.availabletype_id = 'D' + ) + AND ( + f.text IN ( + 'Tape image', + 'Disk image', + 'Snapshot image', + 'POK pokes file' + ) + ) + AND r.release_seq = 0 + AND ( + g.text like '%Game:%' + AND g.text not like 'Casual%' + ) +ORDER BY + e.title; \ No newline at end of file diff --git a/zxdb.py b/zxdb.py index 3877e93..386d548 100644 --- a/zxdb.py +++ b/zxdb.py @@ -7,6 +7,7 @@ import os import random import requests import shutil +import sqlite3 import time import zipfile from dotenv import load_dotenv @@ -40,15 +41,16 @@ url_prefix = { } # Rutas locales donde depositar los resultados -destination_path = r"/home/sergio/zx/zxdb/games/" -cache_path = r"/home/sergio/zx/zxdb/cache/games/" -temp_file = r"/tmp/zxdb.download.tmp" +destination_path = os.getenv('DESTINATION_PATH') +cache_path = os.getenv('CACHE_PATH') +temp_file = os.getenv('TEMP_FILE') + # Parametros de configuración should_clear_destination_path = True # Establece si se limpia primero la carpeta de destino wait = True # Establece una pausa aleatoria entre descargas min_wait = 2 # Segundos mínimos a esperar entre descargas -max_wait = min_wait + 2 # Segundos máximos a esperar entre descargas +max_wait = 4 # Segundos máximos a esperar entre descargas elements = [] filetypes_on_root = [ "Tape image", @@ -57,96 +59,17 @@ filetypes_on_root = [ "POK pokes file", ] # Tipos de fichero que se guardan en la carpeta raíz del juego + +def load_queries(file_path): + with open(file_path, 'r') as file: + queries = file.read().split(';') + return [query.strip() for query in queries if query.strip()] + +# Carga las consultas desde el archivo +queries = load_queries('queries.sql') + # Listado con las consultas def select(cursor, query_index=0): - # Lista de consultas - queries = [] - - # Consulta 0: Devuelve todos los juegos y sus archivos asociados - queries.append(""" - SELECT DISTINCT - e.title, l.name, r.release_year, d.file_link, f.text - FROM - ((((((publishers p - INNER JOIN entries e ON - p.entry_id = e.id) - INNER JOIN labels l ON - p.label_id = l.id) - INNER JOIN genretypes g ON - e.genretype_id = g.id) - INNER JOIN downloads d ON - e.id = d.entry_id) - INNER JOIN filetypes f ON - d.filetype_id = f.id) - INNER JOIN releases r ON - e.id = r.entry_id AND - p.release_seq = r.release_seq) - WHERE - (e.availabletype_id = 'A' OR e.availabletype_id = 'D') AND - (f.text <> 'Remote link' AND f.text <> '?') AND - r.release_seq = 0 AND - (g.text like '%Game:%' AND g.text not like 'Casual%') - ORDER BY - e.title; - """) - - # Consulta 1: Filtra más la consulta anterior - queries.append(""" - SELECT DISTINCT - e.title, l.name, r.release_year, d.file_link, f.text - FROM - ((((((publishers p - INNER JOIN entries e ON - p.entry_id = e.id) - INNER JOIN labels l ON - p.label_id = l.id) - INNER JOIN genretypes g ON - e.genretype_id = g.id) - INNER JOIN downloads d ON - e.id = d.entry_id) - INNER JOIN filetypes f ON - d.filetype_id = f.id) - INNER JOIN releases r ON - e.id = r.entry_id AND - p.release_seq = r.release_seq) - WHERE - (e.availabletype_id = 'A' OR e.availabletype_id = 'D') AND - (f.text <> 'Remote link' AND f.text <> '?') AND - r.release_seq = 0 AND - l.name like 'ZOSYA%' AND - (g.text like '%Game:%' AND g.text not like 'Casual%') - ORDER BY - e.title; - """) - - # Consulta 2: Devuelve juegos y solo archivos de cinta, disco o pokes - queries.append(""" - SELECT DISTINCT - e.title, l.name, r.release_year, d.file_link, f.text - FROM - ((((((publishers p - INNER JOIN entries e ON - p.entry_id = e.id) - INNERJOIN labels l ON - p.label_id = l.id) - INNERJOIN genretypes g ON - e.genretype_id = g.id) - INNERJOIN downloads d ON - e.id = d.entry_id) - INNERJOIN filetypes f ON - d.filetype_id = f.id) - INNERJOIN releases r ON - e.id = r.entry_id AND - p.release_seq = r.release_seq) - WHERE - (e.availabletype_id = 'A' OR e.availabletype_id = 'D') AND - (f.text IN ('Tape image','Disk image','Snapshot image','POK pokes file')) AND - r.release_seq = 0 AND - (g.text like '%Game:%' AND g.text not like 'Casual%') - ORDER BY - e.title; - """) - # Ejecutar la consulta seleccionada cursor.execute(queries[query_index]) @@ -228,29 +151,6 @@ def url_filename(url): return filename # Descarga un fichero a partir de una URL -'''def download_file(url, dest): - try: - r = requests.get(url) - if r.status_code != 200: - return False - with open(dest, "wb") as f: - f.write(r.content) - return True - - except requests.exceptions.Timeout: - # Maybe set up for a retry, or continue in a retry loop - print("Timeout: {}".format(url)) - - except requests.exceptions.TooManyRedirects: - # Tell the user their URL was bad and try a different one - print("Bad URL: {}".format(url)) - - except requests.exceptions.RequestException as e: - # catastrophic error. bail. - raise SystemExit(e)''' - - - def download_file(url, destination): session = requests.Session() retries = Retry(total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504]) @@ -285,6 +185,18 @@ def unzip_file(src, dst): except Exception as e: logging.error(f"Ocurrió un error: {e}") +# Imprime el estado de un archivo en el proceso de descarga +def print_status(current_file, total_files, element, total_files_width, status="cached"): + print( + "({:{width}} / {}) : {:<10} : {} ({})".format( + current_file, + total_files, + status, + element["file_name"], + element["filetype"], + width=total_files_width, + ) + ) # Obtiene los ficheros de la consulta desde internet o desde la caché # y los deposita en la carpeta destino, descomprimiendo los archivos necesarios @@ -336,15 +248,8 @@ def get_files(): unzip_file(cache_file, destination_subfolder) else: shutil.copyfile(cache_file, destination_file) - print( - "({:{width}} / {}) : cached : {} ({})".format( - current_file, - total_files, - element["file_name"], - element["filetype"], - width=total_files_width, - ) - ) + print_status(current_file, total_files, element, total_files_width, status="cached") + # El fichero no está en la cache else: status = "not found " @@ -362,29 +267,14 @@ def get_files(): unzip_file(cache_file, destination_folder) else: shutil.copyfile(cache_file, destination_file) - print( - "({:{width}} / {}) : {} : {} ({})".format( - current_file, - total_files, - status, - element["file_name"], - element["filetype"], - width=total_files_width, - ) - ) + print_status(current_file, total_files, element, total_files_width, status=status) + if wait: time.sleep(random.randint(min_wait, max_wait)) # El fichero ya existe en el destino else: - print( - "({:{width}} / {}) : skipping : {} ({})".format( - current_file, - total_files, - element["file_name"], - element["filetype"], - width=total_files_width, - ) - ) + print_status(current_file, total_files, element, total_files_width, status="skipping") + except Exception as e: logging.error(f"Error al procesar el fichero {element['file_name']}: {e}") @@ -433,7 +323,7 @@ def print_elements(mode=0): # Bucle principal def main(): - connect(query_index=0) + connect(query_index=1) process_elements() print_elements(mode=1) clear_destination_folder()