diff --git a/zxdb.py b/zxdb.py index 240af62..dfd0cc7 100644 --- a/zxdb.py +++ b/zxdb.py @@ -1,5 +1,6 @@ -## Script para descargar pantallas de carga de spectrum a partir de zxdb +## Script para descargar ficheros de spectrum a partir de zxdb +## Imports utilizados en el script import os import mysql.connector import requests @@ -11,15 +12,20 @@ from mysql.connector import errorcode from urllib.parse import urlparse from urllib.request import urlretrieve - +## Direcciones de internet de donde descargar los datos url_prefix = { "spectrum_computing": r"https://spectrumcomputing.co.uk", "wos": r"https://php.sustancia.synology.me/wos", "nvg": r"https://php.sustancia.synology.me/nvg", } + +## Rutas locales donde depositar los resultados destination_path = r"/home/sergio/zx/zxdb/games/" cache_path = r"/home/sergio/zx/zxdb/cache/games/" temp_file = r"/tmp/zxdb.download.tmp" + +## Parametros de configuración +should_clear_destination_path = True # Establece si se limpia primero la carpeta de destino wait = True # Establece una pausa aleatoria entre descargas min_wait = 2 # Segundos mínimos a esperar entre descargas max_wait = min_wait + 1 # Segundos máximos a esperar entre descargas @@ -32,24 +38,42 @@ filetypes_on_root = [ ] # Tipos de fichero que se guardan en la carpeta raíz del juego -def select1(cursor): - query = "SELECT id, title FROM entries WHERE id BETWEEN %s AND %s" - id_start = 1950 - id_end = 1980 - cursor.execute(query, (id_start, id_end)) - for id, title in cursor: - print("{} ({})".format(title, id)) +def select(cursor): + query = [] + selected_query = 0 - -def select2(cursor): - query = "select file_link from downloads where filetype_id=1" - cursor.execute(query) - for file_link in cursor: - elements.append(url_prefix[0] + str(file_link)[3:-3]) - - -def select3(cursor): - query = """ + ## Esta consulta devuelve todos los juegos, filtrando aplicaciones, libros, etc y todos los ficheros asociados a esos juegos + ## 0 + select = """ + SELECT DISTINCT + e.title, l.name, r.release_year, d.file_link, f.text + FROM + ((((((publishers p + INNER JOIN entries e ON + p.entry_id = e.id) + INNER JOIN labels l ON + p.label_id = l.id) + INNER JOIN genretypes g ON + e.genretype_id = g.id) + INNER JOIN downloads d ON + e.id = d.entry_id) + INNER JOIN filetypes f ON + d.filetype_id = f.id) + INNER JOIN releases r ON + e.id = r.entry_id AND + p.release_seq = r.release_seq) + WHERE + (e.availabletype_id = 'A' OR e.availabletype_id = 'D') AND + (f.text <> 'Remote link' AND f.text <> '?') AND + r.release_seq = 0 AND + (g.text like '%Game:%' AND g.text not like 'Casual%') + ORDER BY + e.title;""" + query.append(select) + + ## Esta consulta se usa para filtrar mas la consulta anterior + ## 1 + select = """ SELECT DISTINCT e.title, l.name, r.release_year, d.file_link, f.text FROM @@ -80,7 +104,39 @@ def select3(cursor): #(l.country_id = 'ES' AND l.labeltype_id = 'Z') AND #l.name in ('Ocean Software Ltd', 'Imagine Software Ltd', 'Palace Software', 'Gremlin Graphics Software Ltd', 'Elite Systems Ltd', 'Melbourne House', 'Ultimate Play The Game', 'Durell Software Ltd', 'Codemasters Ltd') AND #e.title = 'Arkanoid - Revenge of Doh' AND - cursor.execute(query) + query.append(select) + + ## Esta consulta devuelve todos los juegos, filtrando aplicaciones, libros, etc y SOLO los ficheros de cinta, disco o pokes + ## 2 + select = """ + SELECT DISTINCT + e.title, l.name, r.release_year, d.file_link, f.text + FROM + ((((((publishers p + INNER JOIN entries e ON + p.entry_id = e.id) + INNER JOIN labels l ON + p.label_id = l.id) + INNER JOIN genretypes g ON + e.genretype_id = g.id) + INNER JOIN downloads d ON + e.id = d.entry_id) + INNER JOIN filetypes f ON + d.filetype_id = f.id) + INNER JOIN releases r ON + e.id = r.entry_id AND + p.release_seq = r.release_seq) + WHERE + (e.availabletype_id = 'A' OR e.availabletype_id = 'D') AND + (f.text IN ('Tape image','Disk image','Snapshot image','POK pokes file')) AND + r.release_seq = 0 AND + (g.text like '%Game:%' AND g.text not like 'Casual%') + ORDER BY + e.title;""" + query.append(select) + + cursor.execute(query[selected_query]) + for row in cursor: element = dict( title=row[0], @@ -91,7 +147,7 @@ def select3(cursor): ) elements.append(element) - +## Establece la conexión a la BBDD y ejecuta la consulta def connect(): config = { "user": "root", @@ -104,7 +160,7 @@ def connect(): try: connection = mysql.connector.connect(**config) cursor = connection.cursor() - select3(cursor) + select(cursor) except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: @@ -119,7 +175,7 @@ def connect(): connection.close() cursor.close() - +## Procesa todos lo elementos, modificando cada uno de sus parametros def process_elements(): global elements for i in range(len(elements)): @@ -161,6 +217,7 @@ def process_elements(): elements[i]["url"] = url_prefix["nvg"] + str(elements[i]["url"][4:]) +## Devuelve el fichero que forma la parte final de una URL def url_filename(url): parsed_url = urlparse(url) path = parsed_url.path @@ -168,6 +225,7 @@ def url_filename(url): return filename +## Descarga un fichero a partir de una URL def download_file(url, dest): try: r = requests.get(url) @@ -190,6 +248,7 @@ def download_file(url, dest): raise SystemExit(e) +## Descomprime los ficheros que coinciden con la lista de extensiones def unzip_file(src, dst): # with zipfile.ZipFile(src, "r") as zip_ref: # zip_ref.extractall(dst) @@ -205,6 +264,8 @@ def unzip_file(src, dst): zip_file.close() +## Obtiene los ficheros de la consulta desde internet o desde la caché +## y los deposita en la carpeta destino, descomprimiendo los archivos necesarios def get_files(): # Variables para la presentación en pantalla de la descarga current_file = 0 @@ -315,6 +376,7 @@ def get_files(): ) +## Elimina los caracteres ilegales de la cadena de texto def normalize_path(path): illegal_chars = ["<", ">", ":", '"', "/", "\\", "|", "?", "*"] replace_with = "_" @@ -323,6 +385,23 @@ def normalize_path(path): return path +## Limpia la carpeta de destino +def clear_destination_folder(): + if should_clear_destination_path: + print("Clear destination folder ...") + folder = destination_path + for filename in os.listdir(folder): + file_path = os.path.join(folder, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print('Failed to delete %s. Reason: %s' % (file_path, e)) + + +## Bucle principal def main(): connect() process_elements() @@ -332,7 +411,9 @@ def main(): # for key, value in element.items(): # print(key, ':', value) + clear_destination_folder() get_files() + # for element in elements: # print(element['title'])