import datetime, hashlib, os, pathlib, shutil, time from fastapi import File, UploadFile from typing import Dict, List, Optional, Set, Union from pydantic import BaseModel, EmailStr, Field, PrivateAttr, ValidationError, validator from app.config import settings from app.db_sql import redis_lookup_id_random, sql_delete, sql_enable_part, sql_insert, sql_limit_offset_part, sql_select, sql_update from app.lib_general import log, logging, logger_reset from app.models.hosted_file_models import Hosted_File_Base # ### BEGIN ### API Hosted File Methods ### create_hosted_file_obj() ### @logger_reset def create_hosted_file_obj(hosted_file_obj_new:Hosted_File_Base): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) # hosted_file_obj_data = hosted_file_obj_new.dict(by_alias=False, exclude_defaults=False, exclude_unset=True, exclude={'created_on', 'updated_on'}) hosted_file_obj_data = hosted_file_obj_new.dict(by_alias=False, exclude_defaults=False, exclude_unset=True, exclude={'saved', 'already_exists', 'copy_timer', 'created_on', 'updated_on'}) if hosted_file_obj_in_result := sql_insert(data=hosted_file_obj_data, table_name='hosted_file', rm_id_random=True, id_random_length=8): pass else: return False log.debug(hosted_file_obj_in_result) hosted_file_id = hosted_file_obj_in_result log.debug(f'Returning the new hosted_file_id: {hosted_file_id}') return hosted_file_id # ### END ### API Hosted File Methods ### create_hosted_file_obj() ### # ### BEGIN ### API Hosted File Methods ### load_hosted_file_obj() ### @logger_reset def load_hosted_file_obj( hosted_file_id: int|str, limit: int = 1000, by_alias: bool = True, exclude_unset: bool = True, model_as_dict: bool = False, enabled: str = 'enabled', # enabled, disabled, all inc_hosted_file_link_list: bool = False, ) -> Hosted_File_Base|dict|bool: log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass else: return False if hosted_file_rec := sql_select(table_name='v_hosted_file', record_id=hosted_file_id): pass elif hosted_file_rec is None: return None else: return False log.debug(hosted_file_rec) try: hosted_file_obj = Hosted_File_Base(**hosted_file_rec) except ValidationError as e: log.error(e.json()) return False # log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(hosted_file_obj) # if inc_x: # x_id = hosted_file_rec.get('x_id', None) # if x_obj_result := load_x_obj(x_id=x_id): # x_obj = x_obj_result # hosted_file_obj.x = x_obj # else: hosted_file_obj.x = None if model_as_dict: return hosted_file_obj.dict(by_alias=by_alias, exclude_unset=exclude_unset) # pylint: disable=no-member else: return hosted_file_obj # ### END ### API Hosted File Methods ### load_hosted_file_obj() ### # ### BEGIN ### API Hosted File Methods ### lookup_file_hash() ### # Updated 2022-08-09 @logger_reset def lookup_file_hash( file_hash: str, ) -> Hosted_File_Base|dict|bool: log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) sql = f""" SELECT id AS 'hosted_file_id', id_random AS 'hosted_file_id_random' FROM hosted_file WHERE hosted_file.hash_sha256 = :hash_sha256 """ log.debug(sql) hosted_file_data = {} hosted_file_data['hash_sha256'] = file_hash log.debug(hosted_file_data) if hosted_file_select_result := sql_select(sql=sql, data=hosted_file_data): hosted_file_id = hosted_file_select_result.get('hosted_file_id') hosted_file_id_random = hosted_file_select_result.get('hosted_file_id_random') log.info(f'Selected Hosted File record. Hosted File ID: {hosted_file_id}') return hosted_file_id elif hosted_file_select_result is None: log.warning(f'Hosted File record was not found. SHA 256 Hash: {file_hash}') return None # pass else: log.error(f'Something went wrong while trying to select the hosted file record. SHA 256 Hash: {file_hash}') return False # ### END ### API Hosted File Methods ### lookup_file_hash() ### # ### BEGIN ### API Hosted File Route ### get_file_object_hash() ### @logger_reset async def get_file_object_hash(file_object:File): #log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) # 4096 bytes is the current block size on my workstation and Linode server # 4096 8192 16384 32768 65536 131072 262144 524288 1048576 bytes block_size = 131072 hash_value = hashlib.sha256() timer_start = time.process_time() for chunk in iter(lambda: file_object.read(block_size), b""): hash_value.update(chunk) file_hash = hash_value.hexdigest() file_object.seek(0) # The file will not properly save if seek is not reset to 0. timer_end = time.process_time() elapsed_time = timer_end - timer_start log.debug(f'Elapsed time: {elapsed_time}') return file_hash # ### END ### API Hosted File Route ### get_file_object_hash() ### # ### BEGIN ### API Hosted File Route ### guess_file_extension() ### def guess_file_extension(filename: str): return filename.rsplit('.', 1)[1].lower() # ### END ### API Hosted File Route ### guess_file_extension() ### # ### BEGIN ### API Hosted File Route ### allowed_file_extension() ### def allowed_file_extension(extension: str, extension_list: list): return extension.lower() in extension_list # app.config['ALLOWED_EXTENSIONS'] # ### END ### API Hosted File Route ### allowed_file_extension() ### # ### BEGIN ### API Hosted File Route ### save_file() ### # Updated 2022-08-09 @logger_reset async def save_file( file: UploadFile, account_id: int, account_id_random: str, link_to_type: str, link_to_id: int, link_to_id_random: str, check_allowed_extension: bool = False, ): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) hosted_files_path = settings.FILES_PATH['hosted_files_root'] # hosted_files_path = '/home/scott/tmp/hosted_files_dev/' log.info(f'Hosted Files Path: {hosted_files_path}') log.debug(shutil.disk_usage(hosted_files_path)) log.debug(dir(file)) log.debug(f'{file.filename}') file_info: dict = {} file_info['saved'] = None file_info['link_to_type'] = link_to_type file_info['link_to_id'] = link_to_id file_info['link_to_id_random'] = link_to_id_random file_info['filename'] = file.filename file_info['extension'] = guess_file_extension(filename=file.filename) if check_allowed_extension: if allowed_file_extension(extension=file_info['extension'], extension_list=['jpg','png','webp']): file_info['extension_allowed'] = True else: file_info['extension_allowed'] = False file_info['saved'] = False return file_info else: file_info['extension_allowed'] = None # There is a difference between Content-Type and MIME type. # https://stackoverflow.com/questions/3452381/whats-the-difference-of-contenttype-and-mimetype file_info['content_type'] = file.content_type # might also include charset or other parameters # file_info['mimetype'] = file.mimetype # This may need to be filled in a different way? file.file.seek(0, os.SEEK_END) file_size = file.file.tell() file.file.seek(0) # The file will not properly save if seek is not reset to 0. log.debug(file_size) file_info['size'] = file_size file_hash = await get_file_object_hash(file.file) log.debug(file_hash) file_info['hash_sha256'] = file_hash # 16384 bytes is the default # 4096 8192 16384 32768 65536 131072 262144 524288 1048576 bytes buffer_size = 524288 #f_src = open(file_src, 'rb') f_src = file.file # Don't need to do open(file_src, 'rb') since it is already "open" file_hash_subdirectory = file_hash[0:2] subdirectory_dest = os.path.join(hosted_files_path, file_hash_subdirectory) log.debug(subdirectory_dest) pathlib.Path(subdirectory_dest).mkdir(parents=True, exist_ok=True) file_info['subdirectory_path'] = file_hash_subdirectory #file_dest = f'{hosted_files_path}{file.filename}' # file_dest = f'{hosted_files_path}{file_hash}.file' file_dest = os.path.join(hosted_files_path, f'{file_hash}.file') file_dest_w_subdir = os.path.join(subdirectory_dest, f'{file_hash}.file') existing_file_check = pathlib.Path(file_dest) existing_file_check_subdir = pathlib.Path(file_dest_w_subdir) if existing_file_check.exists(): log.warning('This file already exists at the destination without the subdirectory. Not re-saving. Going to move the current file and update the database later.') file_info['already_exists'] = True file_info['already_exists_subdir'] = False try: log.info('Moving file to sub directory destination...') timer_start = time.process_time() shutil.move(existing_file_check, existing_file_check_subdir) timer_end = time.process_time() elapsed_time = timer_end - timer_start log.debug(f'Elapsed time: {elapsed_time}') file_info['copy_timer'] = elapsed_time file_info['saved'] = True log.info(f'File moved to: {hosted_files_path}') except Exception as e: log.exception('*** An exception happened. ***') log.exception(repr(e)) log.exception('***') log.exception(str(e)) log.exception('^^^ exception ^^^') file_info['copy_timer'] = 0 file_info['saved'] = False elif existing_file_check_subdir.exists(): log.warning('This file already exists at the destination with the subdirectory. Not re-saving.') file_info['already_exists'] = True file_info['already_exists_subdir'] = True file_info['copy_timer'] = 0 file_info['saved'] = True else: # log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.warning('This file does not already exist at the destination with or without the subdirectory.') file_info['already_exists'] = False file_info['already_exists_subdir'] = False try: log.info('Saving file to destination...') f_dest = open(file_dest_w_subdir, 'wb') timer_start = time.process_time() shutil.copyfileobj(f_src, f_dest, buffer_size) timer_end = time.process_time() elapsed_time = timer_end - timer_start log.debug(f'Elapsed time: {elapsed_time}') file_info['copy_timer'] = elapsed_time file_info['saved'] = True log.info(f'File saved to: {hosted_files_path}') except Exception as e: log.exception('*** An exception happened. ***') log.exception(repr(e)) log.exception('***') log.exception(str(e)) log.exception('^^^ exception ^^^') file_info['copy_timer'] = 0 file_info['saved'] = False return False log.info(f'Disk usage: {shutil.disk_usage(hosted_files_path)}') log.info(f"Filename: {file_info['filename']}") log.info(f"Subdirectory Path: {file_info['subdirectory_path']}") log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(file_info) # if existing_file_check.exists(): # file_info['already_exists'] = True # file_info['copy_timer'] = 0 # file_info['saved'] = True # else: # file_info['already_exists'] = False # try: # f_dest = open(file_dest, 'wb') # timer_start = time.process_time() # shutil.copyfileobj(f_src, f_dest, buffer_size) # timer_end = time.process_time() # elapsed_time = timer_end - timer_start # log.debug(f'Elapsed time: {elapsed_time}') # file_info['copy_timer'] = elapsed_time # file_info['saved'] = True # except Exception as e: # log.exception('*** An exception happened. ***') # log.exception(repr(e)) # log.exception('***') # log.exception(str(e)) # log.exception('^^^ exception ^^^') # file_info['copy_timer'] = 0 # file_info['saved'] = False log.debug(shutil.disk_usage(hosted_files_path)) return file_info # ### END ### API Hosted File Route ### save_file() ### # ### BEGIN ### API Hosted File Methods ### create_hosted_file_link() ### # Updated 2022-08-09 @logger_reset def create_hosted_file_link( account_id: int|str, hosted_file_id: int|str, link_to_type: str, link_to_id: int|str, ): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) if account_id := redis_lookup_id_random(record_id_random=account_id, table_name='account'): pass else: return False if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass else: return False if link_to_id := redis_lookup_id_random(record_id_random=link_to_id, table_name=link_to_type): pass else: return False hosted_file_link_data: dict = {} hosted_file_link_data['account_id'] = account_id hosted_file_link_data['hosted_file_id'] = hosted_file_id hosted_file_link_data['link_to_type'] = link_to_type # Should this be renamed to "link_to_type" for clarity? hosted_file_link_data['link_to_id'] = link_to_id # Should this be renamed to "link_to_id" for clarity? # hosted_file_link_data['test'] = 'test' # NOTE: Currently sql_insert does not handle all successful inserts correctly. If there is not an autonum ID then it will return 0 as the ID. if hosted_file_link_data_in_result := sql_insert(data=hosted_file_link_data, table_name='hosted_file_link', id_random_length=0): log.info('The hosted_file_link was created.') pass # This should be improved elif hosted_file_link_data_in_result is None: log.info('The hosted_file_link probably already exists.') return None else: # This should be improved log.warning('Because the hosted_file_link table does not have a primary autonum this check is incorrect even when successful.') log.warning('Something may have gone wrong while trying to create the hosted_file_link record.') log.warning('The hosted_file_link was probably created fine though.') return False log.debug(hosted_file_link_data_in_result) return True # ### END ### API Hosted File Methods ### create_hosted_file_link() ### # ### BEGIN ### API Hosted File Methods ### handle_delete_hosted_file() ### # Updated 2022-08-09 @logger_reset def handle_delete_hosted_file( account_id: int|str, hosted_file_id: int|str, link_to_type: str = None, link_to_id: int|str = None, rm_all_links: bool = False, rm_orphan: bool = False, ): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) if account_id := redis_lookup_id_random(record_id_random=account_id, table_name='account'): pass else: return False if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass else: return False # ### SECTION ### Handle links NOTE NOTE NOTE NOTE NOTE NOTE # NOTE: If link_to_type and link_to_id passed then try and remove that link record first. if link_to_type and link_to_id: if hosted_file_link_result := delete_hosted_file_link( account_id = account_id, hosted_file_id = hosted_file_id, link_to_type = link_to_type, link_to_id = link_to_id, # rm_orphan = rm_orphan, ): log.info('The hosted file link record was deleted.') elif hosted_file_link_result is None: log.warning('The hosted file link record was not found and may have already been deleted. Odd, but this can happen. event_file has a trigger to delete hosted_file_link when being deleted.') # return None else: log.error('Something went wrong while trying to delete the hosted file link record.') return False # ### SECTION ### Handle orphan check and deletion of hosted_file record and file on server NOTE NOTE NOTE NOTE NOTE NOTE # NOTE: If not rm_orphan then do nothing else. # NOTE: If rm_orphan then get list of links for file. # NOTE: If 0 links result then delete the hosted_file record and file on the server. # NOTE: If >0 links result then do nothing else. # NOTE: Don't check or remove orphan if not rm_orphan: log.info('Removed hosted file link. No orphan check.') return True if hosted_file_obj := load_hosted_file_obj( hosted_file_id = hosted_file_id, # inc_hosted_file = True, inc_hosted_file_link_list = True, # if rm_orphan (True) then need to include hosted_file_link_list (True) ): log.info('Hosted File object loaded.') pass elif hosted_file_obj is None: log.warning('Hosted File object not found. Can not attempt to delete file from the server if there is one.') # pass return None else: log.error('Something went wrong while trying to load the Hosted File object.') return False log.debug(hosted_file_obj) # NOTE: Check and remove orphan if hosted_file_link_rec_list_result := get_hosted_file_link_rec_list(hosted_file_id=hosted_file_id): log.info('This hosted file has linked records to it.') hosted_file_link_result_list = [] for hosted_file_link_rec in hosted_file_link_rec_list_result: hosted_file_link_result_list.append(hosted_file_link_rec) # log.debug( ) hosted_file_list = hosted_file_link_result_list # NOT safe to delete the hosted_file record and file from server!!! # STOP! log.info('Removed hosted file link (above). Still not an orphan file.') return True elif isinstance(hosted_file_link_rec_list_result, list) or hosted_file_link_rec_list_result is None: log.info('This hosted file has no link records to it.') hosted_file_list = [] # Safe to delete the hosted_file record and file from server??? # CONTINUE else: hosted_file_list = False # Safe to delete the hosted_file record and file from server??? # CONTINUE??? log.error('Something went wrong while trying to get a list of the hosted file link records.') return False # ### Orphan file: ### Delete file from server hosted_files_path = settings.FILES_PATH['hosted_files_root'] # hosted_files_path = '/home/scott/tmp/hosted_files_dev/' log.info(f'Hosted Files Path: {hosted_files_path}') dir_path = hosted_file_obj.directory_path subdir_path = hosted_file_obj.subdirectory_path hash_sha256 = hosted_file_obj.hash_sha256 hash_filename = hash_sha256+'.file' if subdir_path: full_subdirectory_path = os.path.join(hosted_files_path, subdir_path) else: full_subdirectory_path = hosted_files_path log.debug(full_subdirectory_path) file_path_w_subdir = os.path.join(full_subdirectory_path, hash_filename) log.info(f'Full file path with subdirectory: {file_path_w_subdir}') if os.path.exists(file_path_w_subdir): log.info('File exists!') log.info('Going remove the file if it is an orphan...') try: pathlib.Path(file_path_w_subdir).unlink() except OSError as e: log.error("Error: %s : %s" % (file_path, e.strerror)) return False pass # return True else: log.warning(f'The hosted file was not found on the server. Hash: {hash_sha256}') pass # return None # ### Orphan file: ### Delete hosted_file record sql = f""" DELETE FROM hosted_file WHERE hosted_file.id = :hosted_file_id """ log.debug(sql) hosted_file_data = {} hosted_file_data['hosted_file_id'] = hosted_file_id log.debug(hosted_file_data) if hosted_file_delete_result := sql_delete(sql=sql, data=hosted_file_data): log.info(f'Deleted Hosted File record. Hosted File ID: {hosted_file_id}') return True elif hosted_file_delete_result is None: log.warning(f'Hosted File record was not found and may have already been removed. Hosted File ID: {hosted_file_id}') return None # pass else: log.error('Something went wrong while trying to delete the hosted file record.') return False # ### END ### API Hosted File Methods ### handle_delete_hosted_file() ### # ### BEGIN ### API Hosted File Methods ### delete_hosted_file_link() ### # Updated 2022-08-09 @logger_reset def delete_hosted_file_link( account_id: int|str, hosted_file_id: int|str, link_to_type: str, link_to_id: int|str, # rm_orphan: bool = False, ): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) # if account_id := redis_lookup_id_random(record_id_random=account_id, table_name='account'): pass # else: return False if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass else: return False if link_to_id := redis_lookup_id_random(record_id_random=link_to_id, table_name=link_to_type): pass else: return False sql = f""" DELETE FROM hosted_file_link WHERE hosted_file_id = :hosted_file_id AND link_to_type = :link_to_type AND link_to_id = :link_to_id """ log.debug(sql) hosted_file_link_data = {} hosted_file_link_data['hosted_file_id'] = hosted_file_id hosted_file_link_data['link_to_type'] = link_to_type hosted_file_link_data['link_to_id'] = link_to_id log.debug(hosted_file_link_data) if hosted_file_delete_result := sql_delete(sql=sql, data=hosted_file_link_data): log.info(f'Deleted Hosted File Link. Hosted File ID: {hosted_file_id}, Link To Type: {link_to_type}, Link To ID: {link_to_id}') elif hosted_file_delete_result is None: return None else: return False return True # ### END ### API Hosted File Methods ### delete_hosted_file_link() ### # ### BEGIN ### API Hosted File Methods ### get_hosted_file_rec_list() ### # This needs to be improved. Currently it does not really do anything. # Need to allow for list by account? Probably have the same actual hosted file have two hosted_file entries if it was uploaded for two separate accounts. # Updated 2022 @logger_reset def get_hosted_file_rec_list( for_obj_type: str, for_obj_id: str, limit: int = 1000, enabled: str = 'enabled', # enabled, disabled, all ) -> list|bool: log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) if for_obj_id := redis_lookup_id_random(record_id_random=for_obj_id, table_name=for_obj_type): pass else: return False data = {} data[f'{for_obj_type}_id'] = for_obj_id # data['for_obj_type'] = for_obj_type sql_obj_type_id = f'`tbl`.{for_obj_type}_id = :{for_obj_type}_id' if enabled in ['enabled', 'disabled', 'all']: if enabled == 'enabled': data['enable'] = True sql_enabled = f'AND `tbl`.enable = :enable' elif enabled == 'disabled': data['enable'] = False sql_enabled = f'AND `tbl`.enable = :enable' elif enabled == 'all': sql_enabled = '' if limit: data['limit'] = limit sql_limit = f'LIMIT :limit' else: sql_limit = '' sql = f""" SELECT `tbl`.id AS 'hosted_file_id', `tbl`.id_random AS 'hosted_file_id_random' FROM `hosted_file` AS `tbl` WHERE {sql_obj_type_id} {sql_enabled} ORDER BY `tbl`.created_on DESC, `tbl`.updated_on DESC {sql_limit}; """ if hosted_file_rec_li_result := sql_select(data=data, sql=sql, as_list=True): hosted_file_rec_li = hosted_file_rec_li_result else: hosted_file_rec_li = [] log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(hosted_file_rec_li_result) return hosted_file_rec_li # ### END ### API Hosted File Methods ### get_hosted_file_rec_list() ### # ### BEGIN ### API Hosted File Methods ### get_hosted_file_link_rec_list() ### # Updated 2022-08-09 @logger_reset def get_hosted_file_link_rec_list( hosted_file_id: int|str, link_to_type: str = None, link_to_id: int|str = None, limit: int = 10, offset: int = 0, enabled: str = 'enabled', # enabled, disabled, all ) -> list|bool: log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) data = {'hosted_file_id': hosted_file_id} # sql_enabled, data['enable'] = sql_enable_part(table_name='hosted_file', enabled=enabled) # Reasonably safe return str and bool sql_limit = sql_limit_offset_part(limit=limit, offset=offset) # Reasonably safe return str sql = f""" SELECT * FROM `hosted_file_link` AS `hosted_file_link` WHERE `hosted_file_link`.hosted_file_id = :hosted_file_id ORDER BY `hosted_file_link`.created_on DESC, `hosted_file_link`.updated_on DESC {sql_limit}; """ if hosted_file_link_rec_li_result := sql_select(data=data, sql=sql, as_list=True): hosted_file_link_rec_li = hosted_file_link_rec_li_result else: hosted_file_link_rec_li = [] log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(hosted_file_link_rec_li_result) return hosted_file_link_rec_li # ### END ### API Hosted File Methods ### get_hosted_file_link_rec_list() ###