Files
OSIT-AE-API-FastAPI/app/routers/hosted_file.py
2021-06-14 17:57:15 -04:00

363 lines
16 KiB
Python

from __future__ import annotations
import datetime, hashlib, os, pathlib, shutil, time
#from datetime import datetime, time, timedelta
from fastapi import APIRouter, Body, Depends, File, Form, Header, HTTPException, Query, Response, status, UploadFile
from pydantic import BaseModel, EmailStr, Field
from typing import Dict, List, Optional, Set, Union
from app.lib_general import log, logging
from app.config import settings
from app.db_sql import sql_insert, sql_update, sql_insert_or_update, sql_select, sql_delete, redis_lookup_id_random
# from .api_crud import delete_obj_template, get_obj_template, get_obj_li_template, patch_obj_template, post_obj_template
from app.models.hosted_file_models import Hosted_File_Base
from app.models.response_models import mk_resp
router = APIRouter()
# ### BEGIN ### API Hosted File Route ### upload_files() ###
# This just needs to return the currect model for a hosted_file
# Everything else seems to be working well
# Should this also do something with meta data and updating the DB?
@router.post('/upload_files/')
async def upload_files(
file_list: List[UploadFile] = File(...),
account_id: str = Form(..., min_length=1, max_length=22),
# filename: Optional[str] = Form(...),
for_object_type: str = Form(...),
for_object_id: str = Form(..., min_length=1, max_length=22),
check_allowed_extension: bool = False,
x_account_id: str = Header(..., ),
return_obj: bool = True,
by_alias: bool = True,
exclude_unset: bool = True,
):
log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
account_id_random = account_id # This is for the account random str ID
if account_id := redis_lookup_id_random(record_id_random=account_id, table_name='account'): pass
else:
return mk_resp(data=None, status_code=400)
for_object_type = for_object_type
for_object_id_random = for_object_id # This is for the object random str ID
if for_object_id := redis_lookup_id_random(record_id_random=for_object_id, table_name=for_object_type): pass
else:
return mk_resp(data=None, status_code=400)
hosted_file_list = []
for file_obj in file_list:
file_info = await save_file(
file = file_obj,
account_id = account_id,
account_id_random = account_id_random,
for_object_type = for_object_type,
for_object_id = for_object_id,
for_object_id_random = for_object_id_random,
check_allowed_extension = check_allowed_extension,
)
if file_info['saved']:
# Create a new host_file object entry
if file_info['already_exists']:
# Look up in DB based on hash
# Get existing host_file object_entry and existing host_file.id_random.
if hosted_file_sel_result := sql_select(
table_name = 'hosted_file',
field_name = 'hash_sha256',
field_value = file_info['hash_sha256'],
):
hosted_file_id = hosted_file_sel_result.get('id_random', None)
# hosted_file_obj = Hosted_File_Base(**file_info)
hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True)
else:
# SOMETHING WENT WRONG
# Going to try and create a new host_file entry...
log.warning('For some reason a host_file object entry with the has was not found.')
# file_info['id_random'] = None
hosted_file_obj = Hosted_File_Base(**file_info)
if hosted_file_obj_result := create_hosted_file_obj(hosted_file_obj_new=hosted_file_obj):
hosted_file_id = hosted_file_obj_result
hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True)
else:
log.warning('For some reason a host_file object entry could not be created.')
hosted_file_id = None
hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member
log.debug(hosted_file_obj_result)
log.debug(hosted_file_sel_result)
else:
# Just in case look up in DB based on hash
if hosted_file_sel_result := sql_select(
table_name = 'hosted_file',
field_name = 'hash_sha256',
field_value = file_info['hash_sha256'],
):
log.warning('Found an existing host_file object_entry in the DB but the file was not found on the server!')
# Got existing host_file object_entry!
# Odd... the hash was found in the database, but the file had to be copied again.
# If this happens then the file on the host server was probably deleted at some point.
hosted_file_id = hosted_file_sel_result.get('id_random', None)
hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True)
else:
# This is normal since the file was not found on the host server and not found in the DB.
# Create a new host_file object entry and new host_file.id_random.
hosted_file_obj = Hosted_File_Base(**file_info)
if hosted_file_obj_result := create_hosted_file_obj(hosted_file_obj_new=hosted_file_obj):
hosted_file_id = hosted_file_obj_result
hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True)
else:
log.warning('For some reason a host_file object entry could not be created.')
hosted_file_id = None
hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member
log.debug(hosted_file_obj_result)
log.debug(hosted_file_sel_result)
else:
file_info['id_random'] = None
hosted_file_obj = Hosted_File_Base(**file_info)
hosted_file_id = None
hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member
# file_info_obj = Hosted_File_Base(**file_info)
hosted_file_dict['extension_allowed'] = file_info['extension_allowed']
hosted_file_dict['already_exists'] = file_info['already_exists']
hosted_file_dict['saved'] = file_info['saved']
hosted_file_dict['copy_timer'] = file_info['copy_timer']
hosted_file_list.append(hosted_file_dict)
log.debug(hosted_file_list)
return mk_resp(data=hosted_file_list)
# ### END ### API Hosted File Route ### upload_files() ###
# ### BEGIN ### API Hosted File Route ### save_file() ###
async def save_file(
file: UploadFile,
account_id: int,
account_id_random: str,
for_object_type: str,
for_object_id: int,
for_object_id_random: str,
check_allowed_extension: bool = False,
):
# log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
hosted_file_path = '/home/scott/tmp/hosted_file_dev/'
log.debug(shutil.disk_usage(hosted_file_path))
log.debug(dir(file))
log.debug(f'{file.filename}')
file_info = {}
file_info['saved'] = None
file_info['for_object_type'] = for_object_type
file_info['for_object_id'] = for_object_id
file_info['for_object_id_random'] = for_object_id_random
file_info['filename'] = file.filename
file_info['extension'] = guess_file_extension(filename=file.filename)
if check_allowed_extension:
if allowed_file_extension:
file_info['extension_allowed'] = True
else:
file_info['extension_allowed'] = False
file_info['saved'] = False
return file_info
else:
file_info['extension_allowed'] = None
# There is a difference between Content-Type and MIME type.
# https://stackoverflow.com/questions/3452381/whats-the-difference-of-contenttype-and-mimetype
file_info['content_type'] = file.content_type # might also include charset or other parameters
# file_info['mimetype'] = file.mimetype # This may need to be filled in a different way?
file.file.seek(0, os.SEEK_END)
file_size = file.file.tell()
file.file.seek(0) # The file will not properly save if seek is not reset to 0.
log.debug(file_size)
file_info['size'] = file_size
file_hash = await get_file_object_hash(file.file)
log.debug(file_hash)
file_info['hash_sha256'] = file_hash
# 16384 bytes is the default
# 4096 8192 16384 32768 65536 131072 262144 524288 1048576 bytes
buffer_size = 524288
#f_src = open(file_src, 'rb')
f_src = file.file # Don't need to do open(file_src, 'rb') since it is already "open"
#file_dest = f'{hosted_file_path}{file.filename}'
file_dest = f'{hosted_file_path}{file_hash}.file'
existing_file_check = pathlib.Path(file_dest)
if existing_file_check.exists():
file_info['already_exists'] = True
file_info['copy_timer'] = 0
file_info['saved'] = True
else:
file_info['already_exists'] = False
try:
f_dest = open(file_dest, 'wb')
timer_start = time.process_time()
shutil.copyfileobj(f_src, f_dest, buffer_size)
timer_end = time.process_time()
elapsed_time = timer_end - timer_start
log.debug(f'Elapsed time: {elapsed_time}')
file_info['copy_timer'] = elapsed_time
file_info['saved'] = True
except Exception as e:
log.exception('*** An exception happened. ***')
log.exception(repr(e))
log.exception('***')
log.exception(str(e))
log.exception('^^^ exception ^^^')
file_info['copy_timer'] = 0
file_info['saved'] = False
log.debug(shutil.disk_usage(hosted_file_path))
return file_info
# ### END ### API Hosted File Route ### save_file() ###
# ### BEGIN ### API Hosted File Route ### get_file_object_hash() ###
async def get_file_object_hash(file_object:File):
#log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
# 4096 bytes is the current block size on my workstation and Linode server
# 4096 8192 16384 32768 65536 131072 262144 524288 1048576 bytes
block_size = 131072
hash_value = hashlib.sha256()
timer_start = time.process_time()
for chunk in iter(lambda: file_object.read(block_size), b""):
hash_value.update(chunk)
file_hash = hash_value.hexdigest()
file_object.seek(0) # The file will not properly save if seek is not reset to 0.
timer_end = time.process_time()
elapsed_time = timer_end - timer_start
log.debug(f'Elapsed time: {elapsed_time}')
return file_hash
# ### END ### API Hosted File Route ### get_file_object_hash() ###
# ### BEGIN ### API Hosted File Route ### guess_file_extension() ###
def guess_file_extension(filename:str):
return filename.rsplit('.', 1)[1].lower()
# ### END ### API Hosted File Route ### guess_file_extension() ###
# ### BEGIN ### API Hosted File Route ### allowed_file_extension() ###
def allowed_file_extension(extension:str):
return extension.lower() in app.config['ALLOWED_EXTENSIONS']
# ### END ### API Hosted File Route ### allowed_file_extension() ###
# ### BEGIN ### API Hosted File Route ### save_file() ###
async def hosted_file_link(
account_id: int,
hosted_file_id: str,
for_object_type: str,
for_object_id: int,
for_object_id_random: str,
):
log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
hosted_file_link_data = {}
hosted_file_link_data['account_id'] = account_id
hosted_file_link_data['hosted_file_id'] = hosted_file_id
hosted_file_link_data['object_type'] = for_object_type
hosted_file_link_data['object_id'] = for_object_id
if response['data']['id'] == True:
#print('Tried to insert a new hosted_file record, but there is a duplicate.')
# There was likely a record with the same hash value.
table_name = 'hosted_file'
field_name = 'hash_sha256'
field_value = data['hash_sha256']
select_hosted_file_response = select_record(table_name=table_name, field_name=field_name, field_value=field_value)
if select_hosted_file_response:
hosted_file_link_data['hosted_file_id'] = select_hosted_file_response['id']
response['data']['id'] = select_hosted_file_response['id']
response['data']['id_random'] = select_hosted_file_response['id_random']
else:
return False
else:
print('Inserted new host_file record.')
pass
table_name = 'hosted_file_link'
hosted_file_link_response = sql_insert_for_rest(data=hosted_file_link_data, table_name=table_name, sql=None, model=None, resource_ref=True)
# ### BEGIN ### API Hosted File Methods ### create_hosted_file_obj() ###
def create_hosted_file_obj(hosted_file_obj_new:Hosted_File_Base):
log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
hosted_file_obj_data = hosted_file_obj_new.dict(by_alias=False, exclude_defaults=False, exclude_unset=True, exclude={'created_on', 'updated_on'})
if hosted_file_obj_in_result := sql_insert(data=hosted_file_obj_data, table_name='hosted_file', rm_id_random=True, id_random_length=8): pass
else:
return False
log.debug(hosted_file_obj_in_result)
hosted_file_id = hosted_file_obj_in_result
log.debug(f'Returning the new hosted_file_id: {hosted_file_id}')
return hosted_file_id
# ### END ### API Hosted File Methods ### create_hosted_file_obj() ###
# ### BEGIN ### API Hosted File Methods ### load_hosted_file_obj() ###
def load_hosted_file_obj(
hosted_file_id: int|str,
limit: int = 1000,
model_as_dict: bool = False,
enabled: str = 'enabled', # enabled, disabled, all
# inc_x: bool = False,
) -> Hosted_File_Base|bool:
log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass
else: return False
if hosted_file_rec := sql_select(table_name='hosted_file', record_id=hosted_file_id):
#log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(hosted_file_rec)
else:
return False
try:
hosted_file_obj = Hosted_File_Base(**hosted_file_rec)
log.debug(hosted_file_obj)
except ValidationError as e:
log.error(e.json())
return False
# if inc_x:
# x_id = hosted_file_rec.get('x_id', None)
# if x_obj_result := load_x_obj(x_id=x_id):
# x_obj = x_obj_result
# hosted_file_obj.x = x_obj
# else: hosted_file_obj.x = None
if model_as_dict:
return hosted_file_obj.dict(by_alias=True, exclude_unset=True) # pylint: disable=no-member
else:
return hosted_file_obj
# ### END ### API Hosted File Methods ### load_hosted_file_obj() ###