Prep for AAPOR with Confex and NCSD STD. General clean up.

This commit is contained in:
Scott Idem
2023-05-02 09:50:24 -04:00
parent c687ade3fa
commit c1f68522aa
6 changed files with 519 additions and 96 deletions

View File

@@ -1,11 +1,18 @@
import datetime, json, pprint, pytz, random, requests, string, time
import datetime, json, os, pprint, pytz, random, requests, shutil, string, time
from typing import Dict, List, Optional, Set, Union
from pydantic import BaseModel, EmailStr, Field, PrivateAttr, ValidationError, validator
from app.config import settings
from app.db_sql import redis_lookup_id_random, sql_insert, sql_select, sql_update
from app.lib_general import log, logging, logger_reset
from app.methods.event_file_methods import create_event_file_obj
from app.methods.hosted_file_methods import create_hosted_file_obj, load_hosted_file_obj, save_file, save_file_to_hosted_file
from app.models.hosted_file_models import Hosted_File_Base
from app.models.event_file_models import Event_File_Base
api = {}
# api['base_url'] = 'https://aapor.confex.com/aapor/2023/meetingapi.cgi/[object]/[id]'
@@ -20,7 +27,7 @@ api['password'] = None
@logger_reset
def get_event_session_list(
):
log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
# if result := authenticate():
@@ -89,7 +96,7 @@ def get_event_session_list(
def get_event_session_detail(
confex_session_id: str, # actually an auto number
):
log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
# if result := authenticate():
@@ -231,7 +238,7 @@ def get_event_presenter_detail(
confex_presentation_id: str, # similar to 'Paper/99999'
confex_presenter_id: str, # similar to 'Person/99999'
):
log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
# if result := authenticate():
@@ -281,7 +288,7 @@ def get_event_presenter_detail(
try_request = True
confex_presenter_detail = False
else:
log.info('Not trying again')
log.warning('Not trying again')
try_request = False
confex_presenter_detail = False
@@ -301,14 +308,9 @@ def get_event_presenter_detail(
def get_event_file_detail(
confex_file_id: str, # similar to 'FileMap/Paper1928_Presentation2'
):
log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
# if result := authenticate():
# log.debug(result)
# else:
# return False
endpoint = f'/{confex_file_id}'
uri = api['base_url']+endpoint
params = {}
@@ -325,16 +327,12 @@ def get_event_file_detail(
log.debug(f'Status Code: {resp.status_code}')
log.debug(f'Headers: {resp.headers}')
# log.debug(f'Encoding: {resp.encoding}')
# log.debug('Text:')
# log.debug(resp.text)
# log.debug(resp.json())
if resp.status_code == 200:
log.info('Status 200')
log.debug(resp.json())
confex_file_detail_raw = resp.json() # .get('data').get('dataList')[0]
confex_file_detail_raw = resp.json()
# log.debug(confex_file_detail_raw)
confex_file_detail = confex_file_detail_raw
@@ -357,8 +355,204 @@ def get_event_file_detail(
log.warning('Something may have gone wrong during the request.')
# log.warning('Something may have gone wrong. Setting the API app_user_token_datetime value to None to re-authenticate with Impexium on the next request.')
# api['app_user_token_datetime'] = None # Resetting this just in case the App and or User token expired.
return confex_file_detail
# ### END ### API External Confex Methods ### get_event_file_detail() ###
# ### END ### API External Confex Methods ### get_event_file_detail() ###
# ### BEGIN ### API External Confex Methods ### get_event_slot_data() ###
# Updated 2023-05-01
@logger_reset
def get_event_slot_data(
confex_slot_id: str, # similar to 'SlotData/Session1110_Slot119'
):
log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
endpoint = f'/{confex_slot_id}'
uri = api['base_url']+endpoint
params = {}
confex_slot_data = None
try_request = True
max_tries = 5
try_count = 0
while try_request and try_count <= max_tries:
try_count = try_count + 1
resp = requests.get(url=uri, params=params, headers=api['headers'])
log.debug(f'Status Code: {resp.status_code}')
log.debug(f'Headers: {resp.headers}')
if resp.status_code == 200:
log.info('Status 200')
log.debug(resp.json())
confex_slot_data_raw = resp.json()
confex_slot_data = confex_slot_data_raw
log.debug(confex_slot_data)
try_request = False
elif resp.status_code == 404:
log.warning('No results returned (status 404)')
try_request = False
confex_slot_data = None
elif resp.status_code == 429:
log.warning('Hit rate limit. Sleeping for .1 seconds...')
time.sleep(.1)
try_request = True
confex_slot_data = False
else:
log.warning('Not trying again')
try_request = False
confex_slot_data = False
log.warning('Something may have gone wrong during the request.')
return confex_slot_data
# ### END ### API External Confex Methods ### get_event_slot_data() ###
# Updated 2023-05-01
@logger_reset
async def get_event_file_save_local(
url: str,
confex_file_info: dict,
account_id: int,
link_to_type: str,
link_to_id: int,
event_id: int = None,
event_location_id: int = None,
event_presentation_id: int = None,
event_presenter_id: int = None,
event_session_id: int = None,
event_track_id: int = None,
) -> None|bool|dict:
log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
# *** Part 1: *** First download the file to tmp and then save the hashed file to hosted_files directory.
hosted_tmp_path = settings.FILES_PATH['hosted_tmp_root']
log.info(f'Hosted Tmp Path: {hosted_tmp_path}')
log.debug(shutil.disk_usage(hosted_tmp_path))
hosted_tmp_path_w_filename = os.path.join(hosted_tmp_path, 'confex', 'confex_temp.file')
# hosted_tmp_path_w_filename = os.path.join(hosted_tmp_path, 'confex_temp.file')
# return True
# response = requests.get(url)
response = requests.get(url, stream=True)
# log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
# log.debug(response)
# log.debug(dir(response))
# file_obj = open(local_path, 'wb')
with open(hosted_tmp_path_w_filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
# *** Part 2: *** Save the hashed file to hosted_files directory.
file_info = await save_file_to_hosted_file(
file_path = hosted_tmp_path_w_filename,
filename = confex_file_info.get('filename'),
extension = confex_file_info.get('extension'),
account_id = account_id,
link_to_type = link_to_type,
link_to_id = link_to_id,
)
# *** Part 3: *** Save information to database in hosted_file table (hosted_file_link table will be updated by an event_file table trigger)
if file_info.get('saved'):
# NOTE: Just in case look up in DB based on hash
log.info('Look up in DB based on hash...')
if hosted_file_sel_result := sql_select(
table_name = 'hosted_file',
field_name = 'hash_sha256',
field_value = file_info['hash_sha256'],
):
log.warning('Found an existing host_file object_entry in the DB but the file was not found on the server!')
# Got existing host_file object_entry!
# Odd... the hash was found in the database, but the file had to be copied again.
# If this happens then the file on the host server was probably deleted at some point.
hosted_file_id = hosted_file_sel_result.get('id', None)
hosted_file_id_random = hosted_file_sel_result.get('id_random', None)
hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True)
else:
# This is normal since the file was not found on the host server and not found in the DB.
# Create a new host_file object entry and new host_file.id_random.
file_info['account_id'] = account_id
# file_info['account_id_random'] = account_id_random
hosted_file_obj = Hosted_File_Base(**file_info)
if hosted_file_obj_result := create_hosted_file_obj(hosted_file_obj_new=hosted_file_obj):
hosted_file_id = hosted_file_obj_result
hosted_file_dict = load_hosted_file_obj(hosted_file_id=hosted_file_id, model_as_dict=True)
else:
log.warning('For some reason a host_file object entry could not be created.')
hosted_file_id = None
hosted_file_dict = hosted_file_obj.dict(by_alias=True, exclude_unset=True, exclude={'id', 'id_random'}) # pylint: disable=no-member
log.debug(hosted_file_obj_result)
log.debug(hosted_file_sel_result)
else: return False
# *** Part 4: *** Save information to database in event_file (will trigger an update to hosted_file_link)
event_file_data = {}
event_file_data['hosted_file_id'] = hosted_file_id
# event_file_data['hosted_file_id_random'] = hosted_file_id_random
event_file_data['for_type'] = link_to_type
event_file_data['for_id'] = link_to_id
if event_id:
event_file_data['event_id'] = event_id
if event_location_id:
event_file_data['event_location_id'] = event_location_id
if event_presentation_id:
event_file_data['event_presentation_id'] = event_presentation_id
if event_presenter_id:
event_file_data['event_presenter_id'] = event_presenter_id
if event_session_id:
event_file_data['event_session_id'] = event_session_id
if event_track_id:
event_file_data['event_track_id'] = event_track_id
event_file_data['filename'] = file_info.get('filename')
event_file_data['extension'] = file_info.get('extension')
# event_file_data['open_in_os'] = event_file_obj.open_in_os
# event_file_data['internal_use'] = event_file_obj.internal_use
# event_file_data['public_use'] = hosted_file_obj.public_use
# event_file_data['lu_file_purpose_id'] = hosted_file_obj.lu_file_purpose_id
# event_file_data['file_purpose'] = hosted_file_obj.file_purpose
# event_file_data['public'] = hosted_file_obj.public
# event_file_data['hide'] = hosted_file_obj.hide
event_file_data['enable'] = True # hosted_file_obj.enable
# log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(event_file_data)
try:
event_file_obj = Event_File_Base(**event_file_data)
except ValidationError as e:
log.error(e.json())
return False
log.debug(event_file_obj)
create_event_file_obj_result = create_event_file_obj(event_file_obj_new=event_file_obj)
log.debug(create_event_file_obj_result)
return file_info

View File

@@ -328,7 +328,7 @@ def get_event_session_rec_list(
approved: str = 'all', # approved, not_approved, all
hidden: str = 'not_hidden', # hidden, not_hidden, all
review: str = 'all', # ready, not_ready, all
limit: int = 100,
limit: int = 150,
offset: int = 0,
) -> list|bool:
log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
@@ -441,7 +441,7 @@ def create_update_event_session_obj_v4(
fail_any: bool = False, # Fail if any thing goes wrong for sub objects
return_outline: bool = False,
) -> int|bool:
log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
log.info('Checking requirements...')

View File

@@ -44,7 +44,7 @@ def load_hosted_file_obj(
enabled: str = 'enabled', # enabled, disabled, all
inc_hosted_file_link_list: bool = False,
) -> Hosted_File_Base|dict|bool:
log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
if hosted_file_id := redis_lookup_id_random(record_id_random=hosted_file_id, table_name='hosted_file'): pass
@@ -116,7 +116,7 @@ def lookup_file_hash(
# ### END ### API Hosted File Methods ### lookup_file_hash() ###
# ### BEGIN ### API Hosted File Route ### get_file_object_hash() ###
# ### BEGIN ### API Hosted File Methods ### get_file_object_hash() ###
@logger_reset
async def get_file_object_hash(file_object:File):
#log.setLevel(logging.WARNING) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
@@ -137,25 +137,25 @@ async def get_file_object_hash(file_object:File):
log.debug(f'Elapsed time: {elapsed_time}')
return file_hash
# ### END ### API Hosted File Route ### get_file_object_hash() ###
# ### END ### API Hosted File Methods ### get_file_object_hash() ###
# ### BEGIN ### API Hosted File Route ### guess_file_extension() ###
# ### BEGIN ### API Hosted File Methods ### guess_file_extension() ###
def guess_file_extension(filename: str):
return filename.rsplit('.', 1)[1].lower()
# ### END ### API Hosted File Route ### guess_file_extension() ###
# ### END ### API Hosted File Methods ### guess_file_extension() ###
# ### BEGIN ### API Hosted File Route ### allowed_file_extension() ###
# ### BEGIN ### API Hosted File Methods ### allowed_file_extension() ###
def allowed_file_extension(extension: str, extension_list: list):
return extension.lower() in extension_list # app.config['ALLOWED_EXTENSIONS']
# ### END ### API Hosted File Route ### allowed_file_extension() ###
# ### END ### API Hosted File Methods ### allowed_file_extension() ###
# ### BEGIN ### API Hosted File Route ### save_file() ###
# ### BEGIN ### API Hosted File Methods ### save_file() ###
# Updated 2022-08-09
@logger_reset
async def save_file(
@@ -167,7 +167,7 @@ async def save_file(
link_to_id_random: str = None,
check_allowed_extension: bool = False,
):
log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
hosted_files_path = settings.FILES_PATH['hosted_files_root']
@@ -371,7 +371,134 @@ async def save_file(
log.debug(shutil.disk_usage(hosted_files_path))
return file_info
# ### END ### API Hosted File Route ### save_file() ###
# ### END ### API Hosted File Methods ### save_file() ###
# ### BEGIN ### API Hosted File Methods ### save_file_to_hosted_file() ###
# Updated 2022-08-09
@logger_reset
async def save_file_to_hosted_file(
file_path: str,
filename: str,
extension: str,
account_id: int,
link_to_type: str,
link_to_id: int,
):
log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(locals())
hosted_files_path = settings.FILES_PATH['hosted_files_root']
log.info(f'Hosted Files Path: {hosted_files_path}')
log.debug(shutil.disk_usage(hosted_files_path))
log.debug(file_path)
log.debug(f'Filename: {filename} Extension: {extension}')
file_obj = open(file_path, 'rb')
file_info: dict = {}
file_info['saved'] = None
file_info['link_to_type'] = link_to_type
file_info['link_to_id'] = link_to_id
file_info['filename'] = filename
file_info['extension'] = extension # guess_file_extension(filename=filename)
# if check_allowed_extension:
# if allowed_file_extension(extension=file_info['extension'], extension_list=['jpg','png','webp']):
# file_info['extension_allowed'] = True
# else:
# file_info['extension_allowed'] = False
# file_info['saved'] = False
# return file_info
# else:
# file_info['extension_allowed'] = None
# There is a difference between Content-Type and MIME type.
# https://stackoverflow.com/questions/3452381/whats-the-difference-of-contenttype-and-mimetype
# file_info['content_type'] = file.content_type # might also include charset or other parameters
# file_info['mimetype'] = file.mimetype # This may need to be filled in a different way?
file_obj.seek(0, os.SEEK_END)
file_size = file_obj.tell()
file_obj.seek(0) # The file will not properly save if seek is not reset to 0.
log.debug(file_size)
file_info['size'] = file_size
file_hash = await get_file_object_hash(file_obj)
log.debug(file_hash)
file_info['hash_sha256'] = file_hash
# 16384 bytes is the default
# 4096 8192 16384 32768 65536 131072 262144 524288 1048576 bytes
buffer_size = 524288
#f_src = open(file_src, 'rb')
f_src = file_obj # Don't need to do open(file_src, 'rb') since it is already "open"
file_hash_subdirectory = file_hash[0:2]
subdirectory_dest = os.path.join(hosted_files_path, file_hash_subdirectory)
log.debug(subdirectory_dest)
pathlib.Path(subdirectory_dest).mkdir(parents=True, exist_ok=True)
file_info['subdirectory_path'] = file_hash_subdirectory
#file_dest = f'{hosted_files_path}{file.filename}'
# file_dest = f'{hosted_files_path}{file_hash}.file'
file_dest = os.path.join(hosted_files_path, f'{file_hash}.file')
file_dest_w_subdir = os.path.join(subdirectory_dest, f'{file_hash}.file')
existing_file_check = pathlib.Path(file_dest)
existing_file_check_subdir = pathlib.Path(file_dest_w_subdir)
log.debug(existing_file_check_subdir)
# return file_info
if existing_file_check_subdir.exists():
log.warning('This file already exists at the destination with the subdirectory. Not re-saving.')
file_info['already_exists'] = True
file_info['already_exists_subdir'] = True
file_info['copy_timer'] = 0
file_info['saved'] = True
else:
# log.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.warning('This file does not already exist at the destination subdirectory.')
file_info['already_exists'] = False
file_info['already_exists_subdir'] = False
try:
log.info('Saving file to destination...')
f_dest = open(file_dest_w_subdir, 'wb')
timer_start = time.process_time()
shutil.copyfileobj(f_src, f_dest, buffer_size)
timer_end = time.process_time()
elapsed_time = timer_end - timer_start
log.debug(f'Elapsed time: {elapsed_time}')
file_info['copy_timer'] = elapsed_time
file_info['saved'] = True
log.info(f'File saved to: {hosted_files_path}')
except Exception as e:
log.exception('*** An exception happened. ***')
log.exception(repr(e))
log.exception('***')
log.exception(str(e))
log.exception('^^^ exception ^^^')
file_info['copy_timer'] = 0
file_info['saved'] = False
return False
log.info(f'Disk usage: {shutil.disk_usage(hosted_files_path)}')
log.info(f"Filename: {file_info['filename']}")
log.info(f"Subdirectory Path: {file_info['subdirectory_path']}")
log.setLevel(logging.INFO) # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
log.debug(file_info)
log.debug(shutil.disk_usage(hosted_files_path))
return file_info
# ### END ### API Hosted File Methods ### save_file_to_hosted_file() ###
# ### BEGIN ### API Hosted File Methods ### create_hosted_file_link() ###