## Script para descargar pantallas de carga de spectrum a partir de zxdb import os import mysql.connector import requests import time import random import zipfile import shutil from mysql.connector import errorcode from urllib.parse import urlparse from urllib.request import urlretrieve url_prefix = [ r"https://spectrumcomputing.co.uk", r"https://archive.org/download/World_of_Spectrum_June_2017_Mirror/World%20of%20Spectrum%20June%202017%20Mirror.zip/World%20of%20Spectrum%20June%202017%20Mirror", ] destination_path = r"/home/sergio/zx/zxdb/games/" cache_path = r"/home/sergio/zx/zxdb/cache/games/" wait = False # Establece una pausa aleatoria entre descargas min_wait = 3 # Segundos mínimos a esperar entre descargas max_wait = 5 # Segundos máximos a esperar entre descargas elements = [] filetypes_on_root = [ "Tape image", "Disk image", "Snapshot image", "POK pokes file", ] # Tipos de fichero que se guardan en la carpeta raíz del juego def select1(cursor): query = "SELECT id, title FROM entries WHERE id BETWEEN %s AND %s" id_start = 1950 id_end = 1980 cursor.execute(query, (id_start, id_end)) for id, title in cursor: print("{} ({})".format(title, id)) def select2(cursor): query = "select file_link from downloads where filetype_id=1" cursor.execute(query) for file_link in cursor: elements.append(url_prefix[0] + str(file_link)[3:-3]) def select3(cursor): query = """ SELECT DISTINCT e.title, l.name, r.release_year, d.file_link, f.text FROM ((((((publishers p INNER JOIN entries e ON p.entry_id = e.id) INNER JOIN labels l ON p.label_id = l.id) INNER JOIN genretypes g ON e.genretype_id = g.id) INNER JOIN downloads d ON e.id = d.entry_id) INNER JOIN filetypes f ON d.filetype_id = f.id) INNER JOIN releases r ON e.id = r.entry_id AND p.release_seq = r.release_seq) WHERE e.title = 'Afterburner' AND r.release_seq = 0 AND g.text like '%Game%' ORDER BY e.title;""" cursor.execute(query) for row in cursor: element = dict( title=row[0], developer=row[1], release_year=row[2], url=row[3], filetype=row[4], ) elements.append(element) def connect(): config = { "user": "root", "password": "unJEPimbJddHP8", "host": "127.0.0.1", "database": "zxdb", "raise_on_warnings": True, } try: connection = mysql.connector.connect(**config) cursor = connection.cursor() select3(cursor) except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: print("Something is wrong with your user name or password") elif err.errno == errorcode.ER_BAD_DB_ERROR: print("Database does not exist") else: print(err) finally: if connection.is_connected(): connection.close() cursor.close() def process_elements(): global elements for i in range(len(elements)): # Construye el nombre de la carpeta raiz elements[i]["root_folder"] = ( elements[i]["title"] + " (" + str(elements[i]["release_year"]) + ")(" + elements[i]["developer"] + ")" ) # Obtiene el nombre del fichero a partir de la url de descarga elements[i]["file_name"] = url_filename(elements[i]["url"]) # Establece la subcarpeta dentro de la raiz elements[i]["subfolder"] = "" if elements[i]["filetype"] not in filetypes_on_root: elements[i]["subfolder"] = normalize_path(elements[i]["filetype"]) # Averigua si el fichero está en formato .zip elements[i]["is_zip"] = elements[i]["file_name"].endswith(".zip") # Calcula el nombre del fichero si es un zip elements[i]["non_zip_file_name"] = elements[i]["file_name"] if elements[i]["is_zip"]: elements[i]["non_zip_file_name"] = elements[i]["file_name"][:-4] # Añade el prefijo a la url if elements[i]["url"].startswith("/zxdb"): elements[i]["url"] = url_prefix[0] + str(elements[i]["url"]) elif elements[i]["url"].startswith("/pub"): #elements[i]["url"] = url_prefix[1] + str(elements[i]["url"][4:]) elements[i]["url"] = url_prefix[0] + str(elements[i]["url"]) def add_prefix(): global elements for i in range(len(elements)): if elements[i][3].startswith("/zxdb"): elements[i][3] = url_prefix[0] + str(elements[i][3]) elif elements[i][3].startswith("/pub"): elements[i][3] = url_prefix[1] + str(elements[i][3][4:]) def url_filename(url): parsed_url = urlparse(url) path = parsed_url.path filename = os.path.basename(path) return filename def download_file(url, dest): try: print("") print("URL : {}".format(url)) r = requests.get(url) print("STATUS_CODE: {}".format(r.status_code)) print("RESPONSE : {}".format(r.reason)) if r.status_code != 200: return False with open(dest, "wb") as f: f.write(r.content) return True except requests.exceptions.Timeout: # Maybe set up for a retry, or continue in a retry loop print("Timeout: {}".format(url)) except requests.exceptions.TooManyRedirects: # Tell the user their URL was bad and try a different one print("Bad URL: {}".format(url)) except requests.exceptions.RequestException as e: # catastrophic error. bail. raise SystemExit(e) def unzip_file(src, dst): with zipfile.ZipFile(src, "r") as zip_ref: zip_ref.extractall(dst) def get_files(): # Variables para la presentación en pantalla de la descarga current_file = 0 total_files = len(elements) last_game_folder = "" for element in elements: # Fichero a descargar # downloaded_file = url_filename(element[3]) # Carpeta del juego en destino y en caché game_folder = element["root_folder"] destination_folder = os.path.join(destination_path, element["root_folder"]) destination_subfolder = os.path.join(destination_folder, element["subfolder"]) # if not os.path.isdir(destination_folder): # os.mkdir(destination_folder) cache_folder = os.path.join(cache_path, element["root_folder"]) cache_subfolder = os.path.join(cache_folder, element["subfolder"]) # if not os.path.isdir(cache_folder): # os.mkdir(cache_folder) # Carpeta de tipo de fichero en destino y en caché # if element[4] not in filetypes: # filetype_folder = normalize_path(element[4]) # destination_folder = os.path.join( # destination_path, game_folder, filetype_folder # ) # if not os.path.isdir(destination_folder): # os.mkdir(destination_folder) # cache_folder = os.path.join(cache_path, game_folder, filetype_folder) # if not os.path.isdir(cache_folder): # os.mkdir(cache_folder) # Ruta completa hasta el fichero de destino y de caché destination_file = os.path.join(destination_subfolder, element["file_name"]) cache_file = os.path.join(cache_subfolder, element["file_name"]) # Actualiza las variables de presentación current_file = current_file + 1 if game_folder != last_game_folder: print("\n{}".format(game_folder)) last_game_folder = game_folder # Comprueba si ya existe el fichero a descargar if not os.path.isfile(destination_file): # Comprueba si ya existe el fichero en la cache if os.path.isfile(cache_file): # Si encuentra el fichero en cache, crea las carpetas de destino y lo copia o lo extrae if not os.path.isdir(destination_folder): os.mkdir(destination_folder) if not os.path.isdir(destination_subfolder): os.mkdir(destination_subfolder) if cache_file.endswith(".zip"): unzip_file(cache_file, destination_subfolder) else: shutil.copyfile(cache_file, destination_file) print( "({:{width}} / {}) : cached : {} ({})".format( current_file, total_files, element["file_name"], element["filetype"], width=2, ) ) # El fichero no está en la cache else: status = "not found " if download_file(element["url"], "downloaded_file.tmp"): status = "downloaded" if os.path.isfile("downloaded_file.tmp"): # Copia el fichero temnporal a la cache if not os.path.isdir(cache_folder): os.mkdir(cache_folder) if not os.path.isdir(cache_subfolder): os.mkdir(cache_subfolder) shutil.copyfile("downloaded_file.tmp", cache_file) os.remove('downloaded_file.tmp') # Copia el fichero de la cache al destino if os.path.isfile(cache_file): if not os.path.isdir(destination_folder): os.mkdir(destination_folder) if not os.path.isdir(destination_subfolder): os.mkdir(destination_subfolder) if cache_file.endswith(".zip"): unzip_file(cache_file, destination_folder) else: shutil.copyfile(cache_file, destination_file) print( "({:{width}} / {}) : {} : {} ({})".format( current_file, total_files, status, element["file_name"], element["filetype"], width=2, ) ) if wait: time.sleep(random.randint(min_wait, max_wait)) # El fichero ya existe en el destino else: print( "({:{width}} / {}) : skipping : {} ({})".format( current_file, total_files, element["file_name"], element["filetype"], width=2, ) ) def normalize_path(path): illegal_chars = ["<", ">", ":", '"', "/", "\\", "|", "?", "*"] replace_with = "_" for char in illegal_chars: path = path.replace(char, replace_with) return path def main(): connect() # add_prefix() process_elements() #for element in elements: # print(element) get_files() if __name__ == "__main__": main()