import aiofiles, datetime, hashlib, mimetypes, os, pathlib, shutil, time from fastapi import APIRouter, Body, Depends, File, Form, Header, HTTPException, Query, Response, status, UploadFile from fastapi.responses import FileResponse, StreamingResponse # from fastapi.responses import StreamingResponse # from baize.asgi.responses import FileResponse # from baize.wsgi.responses import FileResponse from pydantic import BaseModel, EmailStr, Field from typing import Dict, List, Optional, Set, Union from pdf2image import convert_from_path from app.lib_general import log, logging, common_route_params, Common_Route_Params, common_route_params_min, Common_Route_Params_Min from app.config import settings from app.db_sql import sql_insert, sql_update, sql_insert_or_update, sql_select, sql_delete, redis_lookup_id_random # from .api_crud import delete_obj_template, get_obj_template, get_obj_li_template, patch_obj_template, post_obj_template from app.methods.hosted_file_methods import create_hosted_file_obj, handle_delete_hosted_file, load_hosted_file_obj, save_file, save_file_to_hosted_file, create_hosted_file_link, delete_hosted_file_link, get_hosted_file_link_rec_list, lookup_file_hash from app.models.hosted_file_models import Hosted_File_Base from app.models.response_models import Resp_Body_Base, mk_resp router = APIRouter() # ### BEGIN ### API Hosted File ### directory_check() ### # This can be used to clean up the hosted_files directory. Currently it only looks for hashed files in the root, but that is kind of useless now. 2023-03-28 # This needs to be updated to delete orphan files (no records in the DB (dev, test, prod)). Careful... # I also need to clean up the DB side if there is no file in the hosted_files directory. Less concerning? # Updated 2023-03-28 @router.get('/directory_check', response_model=Resp_Body_Base) async def directory_check( rm_orphan: bool = False, commons: Common_Route_Params = Depends(common_route_params), ): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) # ### Orphan file: ### Delete file from server hosted_files_path = settings.FILES_PATH['hosted_files_root'] # hosted_files_path = '/home/scott/tmp/hosted_files_dev/' log.info(f'Hosted Files Path: {hosted_files_path}') full_directory_path = hosted_files_path log.debug(full_directory_path) # file_path_w_subdir = os.path(full_directory_path) # log.info(f'Full file path with subdirectory: {file_path_w_subdir}') if os.path.isdir(full_directory_path): log.info('Path exists! Going to get a list of files...') directory_list = os.listdir(full_directory_path) count = 0 result_list = [] for directory_item in directory_list: if count >= 100: break file_path_w_item = os.path.join(full_directory_path, directory_item) # log.info(f'Full file path with directory item: {file_path_w_item}') # log.info(f'Checking directory item: {directory_item}') if os.path.isfile(file_path_w_item): # ### Found file ### # log.debug(f'File: {directory_item}') # result_list.append(file_path_w_item) if '.file' in directory_item: pass else: log.warning(f'Not a hashed file! File: {directory_item}') continue log.info(f'Hosted hashed file found: {directory_item}') result_list.append(file_path_w_item) # Create a subdirectory with the first 2 characters of the hash full_subdirectory_path = os.path.join(full_directory_path, directory_item[:2]) log.info(f'Making directory: {full_subdirectory_path}') os.makedirs(full_subdirectory_path, exist_ok=True) # Move the file to the subdirectory log.info(f'Moving to: {full_subdirectory_path}') shutil.move(os.path.join(full_directory_path, directory_item), os.path.join(full_subdirectory_path, directory_item)) # if lookup_file_hash_result := lookup_file_hash(file_hash=directory_item.replace('.file', '')): # log.info('DB record found') # # result_list.append(file_path_w_item) # pass # else: # log.warning(f'Hosted File record not found!!! File: {directory_item}') # result_list.append(file_path_w_item) # if rm_orphan: # log.info('Going remove the hosted file from server...') # try: # # log.warning('DELETE') # pathlib.Path(file_path_w_item).unlink() # # continue # except OSError as e: # log.error("Error: %s : %s" % (file_path, e.strerror)) # # return False # continue else: # ### Found directory ### # continue # log.debug(f'Directory: {directory_item}') # pass log.info('Subdirectory Path exists! Going to get a list of files... [LATER]') # full_subdirectory_path = os.path.join(full_directory_path, directory_item) # subdirectory_list = os.listdir(full_subdirectory_path) # subdirectory_result_list = [] # for subdirectory_item in subdirectory_list: # file_path_w_item = os.path.join(full_subdirectory_path, subdirectory_item) # # log.info(f'Full file path with directory item: {file_path_w_item}') # log.info(f'Checking subdirectory item: {subdirectory_item}') # if os.path.isfile(file_path_w_item): # # log.debug(f'File: {subdirectory_item}') # # subdirectory_result_list.append(file_path_w_item) # if '.file' in subdirectory_item: pass # else: # log.warning(f'Not a hashed file! File: {subdirectory_item}') # continue # if lookup_file_hash_result := lookup_file_hash(file_hash=subdirectory_item.replace('.file', '')): # # log.info('DB record found') # # subdirectory_result_list.append(file_path_w_item) # pass # else: # log.warning(f'Hosted File record not found!!! File: {subdirectory_item}') # result_list.append(file_path_w_item) # if rm_orphan: # log.info('Going remove the hosted file from server...') # try: # # log.warning('DELETE') # pathlib.Path(file_path_w_item).unlink() # # continue # except OSError as e: # log.error("Error: %s : %s" % (file_path, e.strerror)) # # return False # continue # else: # log.warning(f'Subdirectory: {subdirectory_item}') # pass count = count + 1 return mk_resp(data=result_list, response=commons.response, status_message='The hosted file directory check.') else: log.warning(f'The hosted file directory was not found on the server.') mk_resp(data=False, status_code=500, response=commons.response, status_message='Something may have gone wrong while trying to check the hosted file directory.') # Internal Server Error # ### END ### API Hosted File ### directory_check() ### # ### BEGIN ### API Hosted File ### download_hosted_file() ### # Updated 2022-08-08 @router.get('/{hosted_file_id}/download', response_model=Resp_Body_Base) async def download_hosted_file( hosted_file_id: str = Query(..., min_length=11, max_length=22), filename: str = Query(None, min_length=4, max_length=100), # streaming: bool = False, commons: Common_Route_Params = Depends(common_route_params), ): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) # ### SECTION ### Secondary data validation if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass else: return mk_resp(data=None, status_code=404, response=commons.response, status_message='The hosted_file ID was invalid or not found.') hosted_files_path = settings.FILES_PATH['hosted_files_root'] log.info(f'Hosted Files Path: {hosted_files_path}') if hosted_file_obj := load_hosted_file_obj( hosted_file_id = hosted_file_id, # inc_hosted_file = True, inc_hosted_file_link_list = True, ): pass else: return mk_resp(data=False, status_code=400, response=commons.response) # Bad Request if not filename: filename = hosted_file_obj.filename log.info(f'Filename: {filename}') dir_path = hosted_file_obj.directory_path subdir_path = hosted_file_obj.subdirectory_path hash_sha256 = hosted_file_obj.hash_sha256 hash_filename = hash_sha256+'.file' if subdir_path: full_subdirectory_path = os.path.join(hosted_files_path, subdir_path) else: full_subdirectory_path = hosted_files_path log.debug(full_subdirectory_path) pathlib.Path(full_subdirectory_path).mkdir(parents=True, exist_ok=True) file_path_w_subdir = os.path.join(full_subdirectory_path, hash_filename) log.info(f'Full file path with subdirectory: {file_path_w_subdir}') if os.path.exists(file_path_w_subdir): # log.info('Hosted file found on server.') # return FileResponse(file_path_w_subdir, filename=filename) log.info('Hosted file found on server.') # if streaming: # log.warning('Streaming!!!') # def iterfile(): # # with open(file_path_w_subdir, mode="rb") as file_like: # # yield from file_like # # return StreamingResponse(iterfile(), media_type='video/mp4') # else: return FileResponse(file_path_w_subdir, filename=filename) else: log.error(f'The hosted file was not found on the server. Hash: {hash_sha256}') return mk_resp(data=None, status_code=404, response=commons.response, status_message=f'The hosted file was not found on the server. Hash: {hash_sha256}') # Not Found # ### END ### API Hosted File ### download_hosted_file() ### # ### BEGIN ### API Hosted File ### file_streamer() ### # Updated 2023-08-18 async def file_streamer(path: str, start: int, end: int): chunk_size = 8192 # 8KB async with aiofiles.open(path, mode='rb') as f: await f.seek(start) while True: chunk_start = await f.tell() if chunk_start >= end: break bytes_to_read = min(chunk_size, end - chunk_start) data = await f.read(bytes_to_read) if not data: break yield data # ### END ### API Hosted File ### file_streamer() ### # ### BEGIN ### API Hosted File ### stream_hosted_file() ### # Updated 2023-08-18 @router.get('/{hosted_file_id}/stream_v2') async def stream_hosted_file( hosted_file_id: str = Query(..., min_length=11, max_length=22), filename: str = Query(None, min_length=4, max_length=100), # streaming: bool = True, range: str = Header(), commons: Common_Route_Params = Depends(common_route_params), ): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) # ### SECTION ### Secondary data validation if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass else: return mk_resp(data=None, status_code=404, response=commons.response, status_message='The hosted_file ID was invalid or not found.') hosted_files_path = settings.FILES_PATH['hosted_files_root'] log.info(f'Hosted Files Path: {hosted_files_path}') if hosted_file_obj := load_hosted_file_obj( hosted_file_id = hosted_file_id, # inc_hosted_file = True, inc_hosted_file_link_list = True, ): pass else: return mk_resp(data=False, status_code=400, response=commons.response) # Bad Request if not filename: filename = hosted_file_obj.filename log.info(f'Filename: {filename}') dir_path = hosted_file_obj.directory_path subdir_path = hosted_file_obj.subdirectory_path hash_sha256 = hosted_file_obj.hash_sha256 hash_filename = hash_sha256+'.file' if subdir_path: full_subdirectory_path = os.path.join(hosted_files_path, subdir_path) else: full_subdirectory_path = hosted_files_path log.debug(full_subdirectory_path) pathlib.Path(full_subdirectory_path).mkdir(parents=True, exist_ok=True) file_path_w_subdir = os.path.join(full_subdirectory_path, hash_filename) log.info(f'Full file path with subdirectory: {file_path_w_subdir}') if os.path.exists(file_path_w_subdir): log.info('Hosted file found on server.') file_size = os.stat(file_path_w_subdir).st_size range_parts = range.replace('bytes=', '').split('-') start = int(range_parts[0]) end = int(range_parts[1]) if len(range_parts) > 1 and range_parts[1] else file_size - 1 if start >= file_size: raise HTTPException(status_code=status.HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE) end = min(end, file_size - 1) content_length = end - start + 1 return StreamingResponse( file_streamer(file_path_w_subdir, start, end + 1), # media_type=mimetypes.guess_type(file_path_w_subdir.name)[0], media_type = mimetypes.guess_type(filename)[0], status_code = status.HTTP_206_PARTIAL_CONTENT, headers = { 'Accept-Ranges': 'bytes', 'Content-Range': f'bytes {start}-{end}/{file_size}', 'Content-Length': str(content_length), } ) else: log.error(f'The hosted file was not found on the server. Hash: {hash_sha256}') return mk_resp(data=None, status_code=404, response=commons.response, status_message=f'The hosted file was not found on the server. Hash: {hash_sha256}') # Not Found # ### END ### API Hosted File ### stream_hosted_file() ### # # ### BEGIN ### API Hosted File ### stream_hosted_file() ### # # Updated 2023-08-17 # # @router.get('/{hosted_file_id}/stream', response_model=Resp_Body_Base) # # @router.get('/{hosted_file_id}/stream', response_class=Resp_Body_Base) # # @router.get('/{hosted_file_id}/stream', response_class=FileResponse) # @router.get('/{hosted_file_id}/stream') # # def stream_hosted_file( # async def stream_hosted_file( # hosted_file_id: str = Query(..., min_length=11, max_length=22), # filename: str = Query(None, min_length=4, max_length=100), # streaming: bool = True, # commons: Common_Route_Params = Depends(common_route_params), # ): # log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL # log.debug(locals()) # # ### SECTION ### Secondary data validation # if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass # else: return mk_resp(data=None, status_code=404, response=commons.response, status_message='The hosted_file ID was invalid or not found.') # hosted_files_path = settings.FILES_PATH['hosted_files_root'] # # hosted_files_path = '/home/scott/tmp/hosted_files_dev/' # log.info(f'Hosted Files Path: {hosted_files_path}') # if hosted_file_obj := load_hosted_file_obj( # hosted_file_id = hosted_file_id, # # inc_hosted_file = True, # inc_hosted_file_link_list = True, # ): # pass # else: # return mk_resp(data=False, status_code=400, response=commons.response) # Bad Request # if not filename: # filename = hosted_file_obj.filename # log.info(f'Filename: {filename}') # dir_path = hosted_file_obj.directory_path # subdir_path = hosted_file_obj.subdirectory_path # hash_sha256 = hosted_file_obj.hash_sha256 # hash_filename = hash_sha256+'.file' # if subdir_path: # full_subdirectory_path = os.path.join(hosted_files_path, subdir_path) # else: # full_subdirectory_path = hosted_files_path # log.debug(full_subdirectory_path) # pathlib.Path(full_subdirectory_path).mkdir(parents=True, exist_ok=True) # file_path_w_subdir = os.path.join(full_subdirectory_path, hash_filename) # log.info(f'Full file path with subdirectory: {file_path_w_subdir}') # if os.path.exists(file_path_w_subdir): # # log.info('Hosted file found on server.') # # return FileResponse(file_path_w_subdir, filename=filename) # log.info('Hosted file found on server.') # # return ChunkFileResponse(filepath=file_path_w_subdir) # # return FileResponse(file_path_w_subdir, filename=filename) # if streaming: # from baize.asgi.responses import FileResponse # log.warning('Streaming!!!') # return FileResponse(filepath=file_path_w_subdir, content_type='application/octet-stream', download_name=filename) # # return FileResponse(filepath=file_path_w_subdir, content_type='video/mp4', download_name=filename) # # def iterfile(): # # # with open(file_path_w_subdir, mode="rb") as file_like: # # # yield from file_like # # # return FileResponse(iterfile(), download_name=filename) # # return StreamingResponse(iterfile(), media_type='video/mp4') # else: # return FileResponse(file_path_w_subdir, filename=filename) # else: # log.error(f'The hosted file was not found on the server. Hash: {hash_sha256}') # return mk_resp(data=None, status_code=404, response=commons.response, status_message=f'The hosted file was not found on the server. Hash: {hash_sha256}') # Not Found # # ### END ### API Hosted File ### stream_hosted_file() ### # ### BEGIN ### API Hosted File Route ### upload_files() ### # This just needs to return the correct model for a hosted_file # Everything else seems to be working well # Should this also do something with meta data and updating the DB? @router.post('/upload_files') async def upload_files( file_list: List[UploadFile] = File(...), account_id: str = Form(..., min_length=1, max_length=22), # filename: Optional[str] = Form(...), link_to_type: str = Form(...), link_to_id: str = Form(..., min_length=1, max_length=22), check_allowed_extension: bool = False, # create_hosted_file_link: bool = True, x_account_id: str = Header(..., ), return_obj: bool = True, by_alias: bool = True, exclude_unset: bool = True, response: Response = Response, ): log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) # NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING # time.sleep(3.5) # NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING # NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING account_id_random = account_id # This is for the account random str ID if account_id := redis_lookup_id_random(record_id_random=account_id, table_name='account'): pass else: return mk_resp(data=None, status_code=400, response=response) link_to_type = link_to_type link_to_id_random = link_to_id # This is for the object random str ID if link_to_id := redis_lookup_id_random(record_id_random=link_to_id, table_name=link_to_type): pass else: return mk_resp(data=None, status_code=400, response=response) hosted_file_list = [] for file_obj in file_list: file_info = await save_file( file = file_obj, account_id = account_id, account_id_random = account_id_random, link_to_type = link_to_type, link_to_id = link_to_id, link_to_id_random = link_to_id_random, check_allowed_extension = check_allowed_extension, ) if file_info['saved']: # Create a new host_file object entry log.info('Check and create a new host_file object entry...') if file_info['already_exists']: # Look up in DB based on hash # Get existing host_file object_entry and existing host_file.id_random. log.info('Look up in DB based on hash...') if hosted_file_sel_result := sql_select( table_name = 'hosted_file', field_name = 'hash_sha256', field_value = file_info['hash_sha256'], ): hosted_file_id = hosted_file_sel_result.get('id_random', None) # hosted_file_obj = Hosted_File_Base(**file_info) hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True) # log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(hosted_file_dict) # ****************************************************** # New as of 2021-08-26 # NOTE: Working on moving all hosted files to subdirectories because there are a lot of files. The database needs to be updated if the file already exists and it does not exist in the new subdirectory. log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(file_info['already_exists']) log.debug(file_info['already_exists_subdir']) log.debug(file_info['subdirectory_path']) if subdirectory_path := hosted_file_dict.get('subdirectory_path'): log.info(f'The new subdirectory_path field was found in the database record? Subdirectory Path: {subdirectory_path}') elif subdirectory_path := file_info.get('subdirectory_path', None): log.info(f'The new subdirectory_path field was not found in the database record. This needs to be updated. Subdirectory Path: {subdirectory_path}') hosted_file_data_up = {} hosted_file_data_up['id'] = hosted_file_id hosted_file_data_up['subdirectory_path'] = subdirectory_path if hosted_file_up_result := sql_update( table_name = 'hosted_file', data = hosted_file_data_up, ): log.info(f'The hosted_file record has been updated with the new subdirectory_path. Hosted File ID: {hosted_file_id} Subdirectory Path: {subdirectory_path}') else: log.warning(f'The hosted_file record was probably not updated with the new subdirectory_path. Hosted File ID: {hosted_file_id} Subdirectory Path: {subdirectory_path}') log.debug(hosted_file_up_result) else: log.warning(f'The new subdirectory_path field was not found in the database record or the passed file info.') # ****************************************************** else: log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL # NOTE: SOMETHING WENT WRONG # Going to try and create a new host_file entry... log.warning('For some reason a host_file object entry with the has was not found.') # file_info['id_random'] = None file_info['account_id'] = account_id file_info['account_id_random'] = account_id_random hosted_file_obj = Hosted_File_Base(**file_info) if hosted_file_obj_result := create_hosted_file_obj(hosted_file_obj_new=hosted_file_obj): hosted_file_id = hosted_file_obj_result hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True) else: log.warning('For some reason a host_file object entry could not be created.') hosted_file_id = None hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member log.debug(hosted_file_obj_result) log.debug(hosted_file_sel_result) else: # NOTE: Just in case look up in DB based on hash log.info('Look up in DB based on hash...') if hosted_file_sel_result := sql_select( table_name = 'hosted_file', field_name = 'hash_sha256', field_value = file_info['hash_sha256'], ): log.warning('Found an existing host_file object_entry in the DB but the file was not found on the server!') # Got existing host_file object_entry! # Odd... the hash was found in the database, but the file had to be copied again. # If this happens then the file on the host server was probably deleted at some point. hosted_file_id = hosted_file_sel_result.get('id_random', None) hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True) else: # This is normal since the file was not found on the host server and not found in the DB. # Create a new host_file object entry and new host_file.id_random. file_info['account_id'] = account_id file_info['account_id_random'] = account_id_random hosted_file_obj = Hosted_File_Base(**file_info) if hosted_file_obj_result := create_hosted_file_obj(hosted_file_obj_new=hosted_file_obj): hosted_file_id = hosted_file_obj_result hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True) else: log.warning('For some reason a host_file object entry could not be created.') hosted_file_id = None hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member log.debug(hosted_file_obj_result) log.debug(hosted_file_sel_result) else: file_info['id_random'] = None hosted_file_obj = Hosted_File_Base(**file_info) hosted_file_id = None hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member # file_info_obj = Hosted_File_Base(**file_info) hosted_file_dict['extension_allowed'] = file_info['extension_allowed'] hosted_file_dict['already_exists'] = file_info['already_exists'] hosted_file_dict['saved'] = file_info['saved'] hosted_file_dict['copy_timer'] = file_info['copy_timer'] hosted_file_dict['filename'] = file_info['filename'] hosted_file_dict['extension'] = file_info['extension'] hosted_file_list.append(hosted_file_dict) # NOTE: Currently sql_insert does not handle all successful inserts correctly. If there is not an autonum ID then it will return 0 as the ID. if link_to_type in ['event', 'event_location', 'event_session', 'event_presentation', 'event_presenter', 'event_badge', 'event_exhibit', 'event_person']: log.info('File is for event module. Trigger will create the hosted_file_link record.') else: if create_hosted_file_link( account_id = account_id, hosted_file_id = hosted_file_id, link_to_type = link_to_type, link_to_id = link_to_id, ): pass # This if statement should be improved else: # This if statement should be improved log.debug('Because the hosted_file_link table does not have a primary autonum this check is incorrect even when successful.') log.debug('Something may have gone wrong while trying to create the hosted_file_link record.') log.debug('The hosted_file_link was probably created fine though.') log.debug(hosted_file_list) return mk_resp(data=hosted_file_list, response=response) # ### END ### API Hosted File Route ### upload_files() ### # ### BEGIN ### API Hosted File Route ### upload_files_fake() ### # This just needs to return the currect model for a hosted_file # Everything else seems to be working well # Should this also do something with meta data and updating the DB? @router.post('/upload_files/fake') async def upload_files_fake( file_info_li: list, account_id: str, # filename: Optional[str] = Form(...), link_to_type: str, link_to_id: Union[int, str], check_allowed_extension: bool = False, # create_hosted_file_link: bool = True, x_account_id: str = Header(..., ), return_obj: bool = True, by_alias: bool = True, exclude_unset: bool = True, response: Response = Response, ): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) log.debug(file_info_li) # account_id_random = account_id # This is for the account random str ID if account_id := redis_lookup_id_random(record_id_random=account_id, table_name='account'): pass else: return mk_resp(data=None, status_code=400, response=response, status_message='The Account ID was not found.') # link_to_id_random = link_to_id # This is for the object random str ID if link_to_id := redis_lookup_id_random(record_id_random=link_to_id, table_name=link_to_type): pass else: return mk_resp(data=None, status_code=400, response=response, status_message=f'The ID for linking was not found. Link To Type: {link_to_type} Link To ID: {link_to_id}') hosted_file_list = [] for file_info in file_info_li: log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(file_info) original_filename = file_info.get('filename', None) original_extension = file_info.get('extension', None) # "saved" means that the file was or is now saved on the file server if file_info['saved']: # Create a new host_file object entry log.info('Check and create a new host_file object entry...') if file_info['already_exists']: # Look up in DB based on hash # Get existing host_file object_entry and existing host_file.id_random. log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.info('Look up in DB based on hash...') if hosted_file_sel_result := sql_select( table_name = 'hosted_file', field_name = 'hash_sha256', field_value = file_info['hash_sha256'], ): # NOTE: Since the file already exists and something was in the database, it may need to be updated with the new subdirectory_path. hosted_file_id = hosted_file_sel_result.get('id', None) hosted_file_id_random = hosted_file_sel_result.get('id_random', None) hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True) # ****************************************************** # New as of 2021-08-26 # NOTE: Working on moving all hosted files to subdirectories because there are a lot of files. The database needs to be updated if the file already exists and it does not exist in the new subdirectory. log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(file_info['already_exists']) log.debug(file_info['already_exists_subdir']) log.debug(file_info['subdirectory_path']) # if not hosted_file_dict.get('subdirectory_path', None): # This means the database record probably needs to be updated with the new subdirectory_path field. # subdirectory_path = file_info['subdirectory_path'] # log.info(f'The database record probably needs to be updated with the new subdirectory_path field. Subdirectory Path (from passed data): {subdirectory_path}') # hosted_file_data_up = {} # hosted_file_data_up['id'] = hosted_file_id # hosted_file_data_up['subdirectory_path'] = subdirectory_path # if hosted_file_up_result := sql_update( # table_name = 'hosted_file', # data = hosted_file_data_up, # ): log.info(f'The hosted_file record has been updated with the new subdirectory_path. Hosted File ID: {hosted_file_id} Subdirectory Path: {subdirectory_path}') # else: # log.warning(f'The hosted_file record was probably not updated with the new subdirectory_path. Hosted File ID: {hosted_file_id} Subdirectory Path: {subdirectory_path}') # log.debug(hosted_file_up_result) if subdirectory_path := hosted_file_dict.get('subdirectory_path'): log.info(f'The new subdirectory_path field was found in the database record? Subdirectory Path: {subdirectory_path}') elif subdirectory_path := file_info.get('subdirectory_path', None): log.info(f'The new subdirectory_path field was not found in the database record. This needs to be updated. Subdirectory Path: {subdirectory_path}') hosted_file_data_up = {} hosted_file_data_up['id'] = hosted_file_id hosted_file_data_up['subdirectory_path'] = subdirectory_path if hosted_file_up_result := sql_update( table_name = 'hosted_file', data = hosted_file_data_up, ): log.info(f'The hosted_file record has been updated with the new subdirectory_path. Hosted File ID: {hosted_file_id} Subdirectory Path: {subdirectory_path}') else: log.warning(f'The hosted_file record was probably not updated with the new subdirectory_path. Hosted File ID: {hosted_file_id} Subdirectory Path: {subdirectory_path}') log.debug(hosted_file_up_result) else: log.warning(f'The new subdirectory_path field was not found in the database record or the passed file info.') # ****************************************************** # log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(hosted_file_dict) else: # NOTE: SOMETHING WENT WRONG # Going to try and create a new host_file entry... log.warning('For some reason a host_file object entry with the has was not found.') # file_info['id_random'] = None hosted_file_obj = Hosted_File_Base(**file_info) if hosted_file_obj_result := create_hosted_file_obj(hosted_file_obj_new=hosted_file_obj): hosted_file_id = hosted_file_obj_result hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True) else: log.warning('For some reason a host_file object entry could not be created.') hosted_file_id = None hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member log.debug(hosted_file_obj_result) log.debug(hosted_file_sel_result) else: # NOTE: Just in case look up in DB based on hash log.info('Look up in DB based on hash...') if hosted_file_sel_result := sql_select( table_name = 'hosted_file', field_name = 'hash_sha256', field_value = file_info['hash_sha256'], ): log.warning('Found an existing host_file object_entry in the DB but the file was not found on the server!') # Got existing host_file object_entry! # Odd... the hash was found in the database, but the file had to be copied again. # If this happens then the file on the host server was probably deleted at some point. hosted_file_id = hosted_file_sel_result.get('id', None) hosted_file_id_random = hosted_file_sel_result.get('id_random', None) hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True) # ****************************************************** # New as of 2021-08-26 # NOTE: Working on moving all hosted files to subdirectories because there are a lot of files. The database needs to be updated if the file already exists and it does not exist in the new subdirectory. log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(file_info['already_exists']) log.debug(file_info['already_exists_subdir']) log.debug(file_info['subdirectory_path']) if subdirectory_path := hosted_file_dict.get('subdirectory_path'): log.info(f'The new subdirectory_path field was found in the database record? Subdirectory Path: {subdirectory_path}') elif subdirectory_path := file_info.get('subdirectory_path', None): log.info(f'The new subdirectory_path field was not found in the database record. This needs to be updated. Subdirectory Path: {subdirectory_path}') hosted_file_data_up = {} hosted_file_data_up['id'] = hosted_file_id hosted_file_data_up['subdirectory_path'] = subdirectory_path if hosted_file_up_result := sql_update( table_name = 'hosted_file', data = hosted_file_data_up, ): log.info(f'The hosted_file record has been updated with the new subdirectory_path. Hosted File ID: {hosted_file_id} Subdirectory Path: {subdirectory_path}') else: log.warning(f'The hosted_file record was probably not updated with the new subdirectory_path. Hosted File ID: {hosted_file_id} Subdirectory Path: {subdirectory_path}') log.debug(hosted_file_up_result) else: log.warning(f'The new subdirectory_path field was not found in the database record or the passed file info.') # ****************************************************** # log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(hosted_file_dict) else: # This is normal since the file was not found on the host server and not found in the DB. # Create a new host_file object entry and new host_file.id_random. log.warning('This is sort of normal. The file may have been deleted from the host server...') hosted_file_obj = Hosted_File_Base(**file_info) if hosted_file_obj_result := create_hosted_file_obj(hosted_file_obj_new=hosted_file_obj): hosted_file_id = hosted_file_obj_result hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True) else: log.warning('For some reason a host_file object entry could not be created.') hosted_file_id = None hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member log.debug(hosted_file_obj_result) log.debug(hosted_file_sel_result) else: # The file was not and is not saved on the file server file_info['id_random'] = None hosted_file_obj = Hosted_File_Base(**file_info) hosted_file_id = None hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member # file_info_obj = Hosted_File_Base(**file_info) hosted_file_dict['extension_allowed'] = file_info['extension_allowed'] hosted_file_dict['already_exists'] = file_info['already_exists'] hosted_file_dict['saved'] = file_info['saved'] hosted_file_dict['copy_timer'] = file_info['copy_timer'] hosted_file_dict['filename'] = file_info['filename'] hosted_file_dict['extension'] = file_info['extension'] log.debug(hosted_file_dict) hosted_file_list.append(hosted_file_dict) # NOTE: Currently sql_insert() does not handle all successful inserts correctly. If there is not an autonum ID then it will return 0 as the ID. if create_hosted_file_link( account_id = account_id, hosted_file_id = hosted_file_id, link_to_type = link_to_type, link_to_id = link_to_id, ): pass # This if statement should be improved else: # This if statement should be improved log.debug('Because the hosted_file_link table does not have a primary autonum this check is incorrect even when successful.') log.debug('Something may have gone wrong while trying to create the hosted_file_link record.') log.debug('The hosted_file_link was probably created fine though.') log.debug(hosted_file_list) return mk_resp(data=hosted_file_list, response=response) # ### END ### API Hosted File Route ### upload_files_fake() ### @router.post('/test_uploads') async def test_upload_files( file_list: List[UploadFile], # account_id: str = Form(..., min_length=1, max_length=22), # filename: Optional[str] = Form(...), response: Response = Response, ): log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) for file_obj in file_list: file_info = await save_file( file = file_obj, account_id = account_id, account_id_random = account_id_random, link_to_type = link_to_type, link_to_id = link_to_id, link_to_id_random = link_to_id_random, check_allowed_extension = check_allowed_extension, ) log.debug(file_info) return mk_resp(data=False, status_code=501, response=response) # ### BEGIN ### API Hosted File ### delete_hosted_file() ### # Updated 2022-08-09 @router.delete('/{hosted_file_id}', response_model=Resp_Body_Base) async def delete_hosted_file( hosted_file_id: str = Query(..., min_length=11, max_length=22), link_to_type: str = None, link_to_id: Union[int, str] = None, rm_orphan: bool = False, commons: Common_Route_Params = Depends(common_route_params), ): log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) # ### SECTION ### Secondary data validation if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass else: return mk_resp(data=None, status_code=404, response=commons.response, status_message='The hosted_file ID was invalid or not found.') # ### SECTION ### Handle the deletion of records and file if hosted_file_delete_result := handle_delete_hosted_file(account_id=commons.x_account_id, hosted_file_id=hosted_file_id, link_to_type=link_to_type, link_to_id=link_to_id, rm_orphan=rm_orphan): return mk_resp(data=True, response=commons.response, status_message='The hosted file link was deleted. Not an orphan file.') elif hosted_file_delete_result is None: log.warning(f'The file and or hosted file record may have already been deleted. Hosted File ID: {hosted_file_id}') return mk_resp(data=None, status_code=404, response=commons.response, status_message='The file and or hosted file record may have already been deleted.') # Not Found (maybe sort of...) else: log.error(f'Something may have gone wrong while trying to delete the hosted file from the server or the hosted_file record.') return mk_resp(data=False, status_code=400, response=commons.response, status_message='Something may have gone wrong while trying to delete the hosted file from the server or the hosted_file record.') # Bad Request # ### END ### API Hosted File ### download_hosted_file() ### # ### BEGIN ### API Hosted File ### get_hosted_file_obj() ### # Updated 2021-09-07 @router.get('/{hosted_file_id}', response_model=Resp_Body_Base) async def get_hosted_file_obj( hosted_file_id: str = Query(..., min_length=11, max_length=22), enabled: str = 'enabled', # enabled, disabled, all; For now this covers any included objects or object lists x_account_id: str = Header(...), by_alias: Optional[bool] = True, exclude_unset: Optional[bool] = True, response: Response = Response, ): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass else: return mk_resp(data=None, status_code=404, response=response) if hosted_file_obj := load_hosted_file_obj( hosted_file_id = hosted_file_id, enabled = enabled, ): hosted_file_dict = hosted_file_obj.dict(by_alias=by_alias, exclude_unset=exclude_unset) pass else: return mk_resp(data=False, status_code=400, response=response) # Bad Request return mk_resp(data=hosted_file_dict, response=response) #return mk_resp(data=hosted_file_obj) # ### END ### API Hosted File ### get_hosted_file_obj() ### # ### BEGIN ### API Hosted File ### download_tmp() ### # Updated 2023-04-05 @router.get('/tmp/{subdirectory}/{filename}/download', response_model=Resp_Body_Base) async def download_tmp( subdirectory: str = Query(..., min_length=1, max_length=100), filename: str = Query(..., min_length=4, max_length=120), commons: Common_Route_Params = Depends(common_route_params), ): log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) # NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING time.sleep(3.5) # NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING # NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING NOTE: WARNING hosted_tmp_path = settings.FILES_PATH['hosted_tmp_root'] log.info(f'Hosted Tmp Path: {hosted_tmp_path}') log.debug(shutil.disk_usage(hosted_tmp_path)) hosted_tmp_w_subdir = os.path.join(hosted_tmp_path, subdirectory) # if pathlib.Path(hosted_tmp_w_subdir): if os.path.exists(hosted_tmp_w_subdir): log.info('Hosted tmp with subdirectory found') else: log.info('Hosted tmp with subdirectory not found') return mk_resp(data=False, status_code=404, response=commons.response, status_message='The hosted tmp file subdirectory was not found.') # Not Found hosted_tmp_w_subdir_filename = os.path.join(hosted_tmp_path, subdirectory, filename) # if pathlib.Path(hosted_tmp_w_subdir_filename): if os.path.exists(hosted_tmp_w_subdir_filename): log.info('Hosted tmp with subdirectory and filename found') else: log.info('Hosted tmp with subdirectory and filename not found') return mk_resp(data=False, status_code=404, response=commons.response, status_message='The hosted tmp file was not found.') # Not Found return FileResponse(hosted_tmp_w_subdir_filename, filename=filename) # ### END ### API Hosted File ### download_tmp() ### # ### BEGIN ### API Hosted File Route ### convert_file() ### # This just needs to return the correct model for a new hosted_file # Updated 2023-04-04 @router.get('/{hosted_file_id}/convert_file') async def convert_file( hosted_file_id: str = Query(..., min_length=11, max_length=22), link_to_type: str = Query(..., min_length=2, max_length=50), link_to_id: str = Query(..., min_length=11, max_length=22), # filename: str = Query('automatic_pdf_to_img_conversion.webp', min_length=2, max_length=150), filename_no_ext: str = Query('automated_hosted_file_conversion', min_length=1, max_length=150), # extension: str = Query('webp', min_length=1, max_length=15), from_type: str = 'pdf', to_type: str = 'webp', pdf_opt1: bool = False, pdf_opt2: str = 'test', commons: Common_Route_Params = Depends(common_route_params), ): log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL log.debug(locals()) account_id = commons.x_account_id # OSIT _XY7DXtc9MY (1) account_id_random = commons.x_account_id_random # example event_presenter: B3d8eILlQjI (3616) link_to_id_random = link_to_id # This is for the object random str ID if link_to_id := redis_lookup_id_random(record_id_random=link_to_id, table_name=link_to_type): pass else: return mk_resp(data=None, status_code=400, response=commons.response) # Need to look up file_hash for hosted_file_id hosted_file_obj = load_hosted_file_obj(hosted_file_id=hosted_file_id) file_hash = hosted_file_obj.hash_sha256 # file_hash = '0080f0b03144927c173694745483894a09208d9444fdaccab054493f699361be' # file_hash = '279312d1738fd3a8a2f136b48295e28664d38b18de66c55de56b8886b9454784' # G1rTLpGbzhs (5046) file_hash_filename = f'{file_hash}.file' hosted_files_path = settings.FILES_PATH['hosted_files_root'] log.info(f'Hosted Files Path: {hosted_files_path}') log.debug(shutil.disk_usage(hosted_files_path)) file_subdirectory = file_hash[0:2] full_file_path = os.path.join(hosted_files_path, file_subdirectory, file_hash_filename) log.info(f'File Hash with Subdirectory: {full_file_path}') hosted_tmp_path = settings.FILES_PATH['hosted_tmp_root'] log.info(f'Hosted Tmp Path: {hosted_tmp_path}') log.debug(shutil.disk_usage(hosted_tmp_path)) hosted_tmp_convert_file_path = os.path.join(hosted_tmp_path, 'convert_file') if pathlib.Path(hosted_tmp_convert_file_path): log.info('Hosted tmp convert file path found') else: log.info('Creating hosted tmp convert file path') pathlib.Path(hosted_tmp_convert_file_path).mkdir(parents=True, exist_ok=True) # 8K 8192x4320 # UHD 8K 7680x4320 # 4K 4096x2160 # UHD 4K 3840x2160 # 2K 2048x1080 # HD 1920x1080 # Save as webp with 3840 size and 90 lossy quality works well for posters. Better than in the past with PNG. Higher resolution and smaller file size! -2023-05-04 images = convert_from_path(full_file_path, size=(3840, None)) # 2160 works well for image in images: # *** Part 1: *** Convert the file and save the file to tmp and then save the hashed file to hosted_files directory. if to_type == 'webp': save_path = os.path.join(hosted_tmp_convert_file_path, 'converted_3840px_lossy_90q.webp') # save_path = os.path.join(hosted_tmp_convert_file_path, 'converted_3840px_lossless_100q.webp') # Lossy WebP takes about 25% of the time as WebP lossless compression with 100 level effort # .46 seconds vs 2.1 seconds with example PDF # image.save('testing_2625px_80q.webp', quality=80) # default # timer_2a_start = timer() image.save(save_path, lossless=False, quality=90) # default quality is 80 # timer_2a_end = timer() # print( round((timer_2a_end - timer_2a_start), 8) ) elif to_type == 'png': save_path = os.path.join(hosted_tmp_convert_file_path, 'converted_3840px_lossless_9.png') image.save(save_path, compress_level=9) else: return False # timer_2b_start = timer() # image.save('testing_2160px_lossless_100q.webp', lossless=True, quality=100) # quality is level of effort # timer_2b_end = timer() # print( round((timer_2b_end - timer_2b_start), 8) ) # file_info = await save_file( # file = file_obj, # account_id = account_id, # account_id_random = account_id_random, # link_to_type = link_to_type, # link_to_id = link_to_id, # link_to_id_random = link_to_id_random, # check_allowed_extension = False, # ) # if file_info['saved']: pass # *** Part 2: *** Save the converted hashed file to hosted_files directory. file_info = await save_file_to_hosted_file( file_path = save_path, filename = f'{filename_no_ext}.{to_type}', extension = to_type, account_id = account_id, link_to_type = link_to_type, link_to_id = link_to_id, ) # *** Part 3: *** Save information to database in hosted_file table (hosted_file_link table will be updated by an event_file table trigger) if file_info.get('saved'): # NOTE: Just in case look up in DB based on hash log.info('Look up in DB based on hash...') if hosted_file_sel_result := sql_select( table_name = 'hosted_file', field_name = 'hash_sha256', field_value = file_info['hash_sha256'], ): log.warning('Found an existing host_file object_entry in the DB but the file was not found on the server!') # Got existing host_file object_entry! # Odd... the hash was found in the database, but the file had to be copied again. # If this happens then the file on the host server was probably deleted at some point. hosted_file_id = hosted_file_sel_result.get('id', None) hosted_file_id_random = hosted_file_sel_result.get('id_random', None) hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True) else: # This is normal since the file was not found on the host server and not found in the DB. # Create a new host_file object entry and new host_file.id_random. file_info['account_id'] = account_id # file_info['account_id_random'] = account_id_random hosted_file_obj = Hosted_File_Base(**file_info) if hosted_file_obj_result := create_hosted_file_obj(hosted_file_obj_new=hosted_file_obj): hosted_file_id = hosted_file_obj_result hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True) else: log.warning('For some reason a host_file object entry could not be created.') hosted_file_id = None hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member log.debug(hosted_file_obj_result) log.debug(hosted_file_sel_result) else: return False log.debug(hosted_file_dict) return mk_resp(data=hosted_file_dict, response=commons.response) # *** Part 4: *** Save information to database in event_file (will trigger an update to hosted_file_link) # event_file_data = {} # event_file_data['hosted_file_id'] = hosted_file_id # # event_file_data['hosted_file_id_random'] = hosted_file_id_random # event_file_data['for_type'] = link_to_type # event_file_data['for_id'] = link_to_id # if event_id: # event_file_data['event_id'] = event_id # if event_location_id: # event_file_data['event_location_id'] = event_location_id # if event_presentation_id: # event_file_data['event_presentation_id'] = event_presentation_id # if event_presenter_id: # event_file_data['event_presenter_id'] = event_presenter_id # if event_session_id: # event_file_data['event_session_id'] = event_session_id # if event_track_id: # event_file_data['event_track_id'] = event_track_id # event_file_data['filename'] = file_info.get('filename') # event_file_data['extension'] = file_info.get('extension') # event_file_data['enable'] = True # hosted_file_obj.enable # # log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL # log.debug(event_file_data) # try: # event_file_obj = Event_File_Base(**event_file_data) # except ValidationError as e: # log.error(e.json()) # return False # log.debug(event_file_obj) # create_event_file_obj_result = create_event_file_obj(event_file_obj_new=event_file_obj) # log.debug(create_event_file_obj_result) # return file_info