diff --git a/app/routers/api_v3_actions_event_file.py b/app/routers/api_v3_actions_event_file.py index 0aa168a..3359071 100644 --- a/app/routers/api_v3_actions_event_file.py +++ b/app/routers/api_v3_actions_event_file.py @@ -13,8 +13,8 @@ log = logging.getLogger(__name__) from app.config import settings from app.db_sql import redis_lookup_id_random, sql_select, sql_update, sql_delete, get_id_random from app.methods.hosted_file_methods import ( - create_hosted_file_obj, load_hosted_file_obj, save_file, - create_hosted_file_link, delete_hosted_file_link + create_hosted_file_obj, load_hosted_file_obj, save_file, + create_hosted_file_link, delete_hosted_file_link, handle_delete_hosted_file ) from app.methods.event_file_methods import create_event_file_obj, load_event_file_obj from app.lib_general_v3 import ( @@ -276,6 +276,49 @@ async def create_event_file_from_hosted_file_action( return mk_resp(data={"event_file_id": get_id_random(res_ef_id, 'event_file')}) +@router.delete('/{event_file_id}', response_model=Resp_Body_Base) +async def delete_event_file_action( + event_file_id: str = Path(min_length=11, max_length=22), + rm_orphan: bool = Query(True), + account: AccountContext = Depends(get_account_context), + delay: DelayParams = Depends(), + ): + """ + Atomic delete for an event_file record. + 1. Removes the hosted_file_link (event_file → hosted_file). + 2. If rm_orphan=True and no other links remain, removes the physical file + and hosted_file DB record. + 3. Removes the event_file row itself. + """ + if delay.sleep_time_s > 0: await asyncio.sleep(delay.sleep_time_s) + + ef_id_int = redis_lookup_id_random(record_id_random=event_file_id, table_name='event_file') + if not ef_id_int: + raise HTTPException(status_code=404, detail="Event file not found.") + + ef_rec = sql_select(sql="SELECT hosted_file_id FROM event_file WHERE id = :id", data={'id': ef_id_int}) + hf_id_int = ef_rec.get('hosted_file_id') if ef_rec else None + + link_cleaned = False + if hf_id_int: + link_cleaned = handle_delete_hosted_file( + account_id=account.account_id, + hosted_file_id=hf_id_int, + link_to_type='event_file', + link_to_id=ef_id_int, + rm_orphan=rm_orphan, + ) + if not link_cleaned: + log.warning(f"handle_delete_hosted_file returned False for hosted_file {hf_id_int} / event_file {ef_id_int}") + + sql_delete(table_name='event_file', record_id=ef_id_int) + + return mk_resp(data={ + 'event_file_deleted': True, + 'hosted_file_link_cleaned': bool(link_cleaned), + }) + + @router.get('/{event_file_id}/download') async def download_event_file_action( response: Response, diff --git a/app/routers/api_v3_actions_hosted_file.py b/app/routers/api_v3_actions_hosted_file.py index 9d252d3..5cf11fc 100644 --- a/app/routers/api_v3_actions_hosted_file.py +++ b/app/routers/api_v3_actions_hosted_file.py @@ -370,6 +370,71 @@ async def check_hosted_file_obj_w_hash_action( return mk_resp(data=False, status_code=404, response=response, status_message="No record found for this hash.") +@router.get('/orphan_scan', response_model=Resp_Body_Base) +async def orphan_scan_action( + include_disk_orphans: bool = Query(False), + limit: int = Query(500, ge=1, le=5000), + offset: int = Query(0, ge=0), + account: AccountContext = Depends(get_account_context), + ): + """ + Admin: find hosted_file records with no hosted_file_link entries (DB orphans), + and optionally physical files on disk with no hosted_file DB record (disk orphans). + Use limit/offset to page through large backlogs. + """ + db_orphan_sql = """ + SELECT hf.id, hf.id_random, hf.filename, hf.hash_sha256, + hf.subdirectory_path, hf.size, hf.content_type, hf.created_on + FROM hosted_file hf + LEFT JOIN hosted_file_link hfl ON hfl.hosted_file_id = hf.id + WHERE hfl.id IS NULL + AND (hf.hide IS NULL OR hf.hide != 1) + ORDER BY hf.created_on ASC + LIMIT :limit OFFSET :offset + """ + raw = sql_select(sql=db_orphan_sql, data={'limit': limit, 'offset': offset}, as_list=True) or [] + + db_orphans = [] + for row in raw: + db_orphans.append({ + 'hosted_file_id': row.get('id_random') or get_id_random(row['id'], 'hosted_file'), + 'filename': row.get('filename'), + 'hash_sha256': row.get('hash_sha256'), + 'subdirectory_path': row.get('subdirectory_path'), + 'size': row.get('size'), + 'content_type': row.get('content_type'), + 'created_on': str(row.get('created_on', '')), + }) + + result = { + 'db_orphans': db_orphans, + 'db_orphan_count': len(db_orphans), + 'disk_orphans': [], + 'disk_orphan_count': 0, + } + + if include_disk_orphans: + hosted_files_root = settings.FILES_PATH['hosted_files_root'] + all_db_hashes_raw = sql_select(sql="SELECT hash_sha256 FROM hosted_file", as_list=True) or [] + all_db_hashes = {r['hash_sha256'] for r in all_db_hashes_raw} + + disk_orphans = [] + for dirpath, _, filenames in os.walk(hosted_files_root): + for fname in filenames: + if fname.endswith('.file'): + sha256 = fname[:-5] + if sha256 not in all_db_hashes: + disk_orphans.append({ + 'hash_sha256': sha256, + 'path': os.path.join(dirpath, fname), + }) + + result['disk_orphans'] = disk_orphans + result['disk_orphan_count'] = len(disk_orphans) + + return mk_resp(data=result) + + @router.get('/{hosted_file_id}/links', response_model=Resp_Body_Base) async def get_file_links_action( hosted_file_id: str = Path(min_length=11, max_length=22),