OSIT-AE-App-Svelte/src/lib/ae_journals/ae_journals_parsers.ts

import { ae_util } from '$lib/ae_utils/ae_utils';
import type { key_val } from '$lib/stores/ae_stores';

export interface AeJournalEntryInput {
    name: string;
    content: string;
    tags: string[];
    created_on?: string; // ISO string
    updated_on?: string; // ISO string
    import_id?: string;
    external_id?: string;
    type_code?: string;
    original_filename: string;
}

/**
 * Standard Parser
 * - Treats the whole file as one entry.
 * - First line is title (if it looks like a title).
 * - Rest is content.
 */
export async function parse_standard_note(
    file: File,
    text: string
): Promise<AeJournalEntryInput[]> {
    const lines = text.split('\n');
    let name = file.name.replace(/\.md$/i, '').replace(/\.txt$/i, '');
    let content = text;
    const tags: string[] = [];

    // Heuristic: If first line is a header, use it as name
    if (lines.length > 0 && lines[0].startsWith('# ')) {
        name = lines[0].substring(2).trim();
        content = lines.slice(1).join('\n').trim();
    } else if (
        lines.length > 0 &&
        lines[0].trim().length > 0 &&
        lines[0].trim().length < 60
    ) {
        // First line is short, treat as title if it doesn't look like frontmatter
        if (lines[0].trim() !== '---') {
            name = lines[0].trim();
            content = lines.slice(1).join('\n').trim();
        }
    }

    // Basic Frontmatter check (YAML style)
    if (lines[0]?.trim() === '---') {
        const endFrontmatter = lines.indexOf('---', 1);
        if (endFrontmatter > -1) {
            const frontmatter = lines.slice(1, endFrontmatter);
            content = lines
                .slice(endFrontmatter + 1)
                .join('\n')
                .trim();

            // Extract tags or title from frontmatter (very basic parsing)
            frontmatter.forEach((line) => {
                if (line.startsWith('title:'))
                    name = line
                        .substring(6)
                        .trim()
                        .replace(/^['"]|['"]$/g, '');
                if (line.startsWith('tags:')) {
                    // This is brittle, assumes inline tags like [a, b] or comma separated
                    const tagPart = line.substring(5).trim();
                    if (tagPart.startsWith('[') && tagPart.endsWith(']')) {
                        tagPart
                            .substring(1, tagPart.length - 1)
                            .split(',')
                            .forEach((t) => tags.push(t.trim()));
                    } else {
                        tagPart.split(',').forEach((t) => tags.push(t.trim()));
                    }
                }
            });
        }
    }

    const lastModified = new Date(file.lastModified).toISOString();

    return [
        {
            name,
            content,
            tags,
            updated_on: lastModified,
            created_on: lastModified, // We don't really know creation time from File object usually
            original_filename: file.name,
            type_code: 'note'
        }
    ];
}

/**
 * Personal Log Parser
 * - Splits file by dates: `## YYYY-MM-DD`
 */
export async function parse_personal_log(
    file: File,
    text: string
): Promise<AeJournalEntryInput[]> {
    const entries: AeJournalEntryInput[] = [];
    const dateRegex = /^##\s+(\d{4}-\d{2}-\d{2})(.*)$/;

    const lines = text.split('\n');
    let currentEntry: Partial<AeJournalEntryInput> | null = null;
    let currentContent: string[] = [];

    // Check if the whole file is just one entry (no date headers)
    if (!lines.some((l) => dateRegex.test(l))) {
        return parse_standard_note(file, text);
    }

    const fileBaseName = file.name.replace(/\.md$/i, '').replace(/\.txt$/i, '');

    for (const line of lines) {
        const match = line.match(dateRegex);
        if (match) {
            // Save previous entry
            if (currentEntry) {
                currentEntry.content = currentContent.join('\n').trim();
                entries.push(currentEntry as AeJournalEntryInput);
            }

            // Start new entry
            const dateStr = match[1];
            const extraTitle = match[2].trim();

            currentEntry = {
                name: extraTitle
                    ? `${dateStr} - ${extraTitle}`
                    : `${fileBaseName} - ${dateStr}`,
                created_on: `${dateStr}T12:00:00`, // Noon on that day
                updated_on: new Date(file.lastModified).toISOString(),
                tags: ['log'],
                type_code: 'log',
                original_filename: file.name
                // Reconstruct the header as part of content? Or just skip it?
                // Python parser added it back: `## {date_str}\n\n{body}`
                // Let's add it back for context.
            };
            currentContent = [`## ${dateStr} ${extraTitle}`];
        } else {
            if (currentEntry) {
                currentContent.push(line);
            } else {
                // Preamble before first date header? Ignore or treat as separate?
                // Ignoring for now or could be a "Header" entry.
            }
        }
    }

    // Push last entry
    if (currentEntry) {
        currentEntry.content = currentContent.join('\n').trim();
        entries.push(currentEntry as AeJournalEntryInput);
    }

    return entries;
}

/**
 * Amazon Vine Review Parser (from Python logic)
 * - Splits by `## Product Name`
 * - Looks for URL and `### Review Title`
 */
export async function parse_amazon_vine(
    file: File,
    text: string
): Promise<AeJournalEntryInput[]> {
    // Split by `\n## ` but we need to keep the delimiter or reconstruct
    // JS split doesn't keep delimiter nicely unless captured.
    // Let's iterate lines.
    const entries: AeJournalEntryInput[] = [];
    const productHeaderRegex = /^##\s+(.+)$/;

    const lines = text.split('\n');
    let currentEntry: any = null;
    let currentBody: string[] = [];

    for (const line of lines) {
        const match = line.match(productHeaderRegex);
        if (match) {
            if (currentEntry) {
                entries.push(
                    format_vine_entry(currentEntry, currentBody, file)
                );
            }
            currentEntry = { productName: match[1].trim() };
            currentBody = [];
        } else {
            if (currentEntry) {
                currentBody.push(line);
            }
        }
    }
    if (currentEntry) {
        entries.push(format_vine_entry(currentEntry, currentBody, file));
    }

    return entries;
}

function format_vine_entry(
    entry: any,
    bodyLines: string[],
    file: File
): AeJournalEntryInput {
    let url = '';
    let reviewTitle = '';
    const cleanBody: string[] = [];

    for (const line of bodyLines) {
        const trimmed = line.trim();
        if (!url && trimmed.startsWith('* http')) {
            url = trimmed.replace(/^\*\s+/, '').split(' ')[0];
            continue;
        }
        if (!reviewTitle && trimmed.startsWith('### ')) {
            reviewTitle = trimmed.substring(4).trim();
            continue;
        }
        cleanBody.push(line);
    }

    let content = '';
    if (reviewTitle) content += `# ${reviewTitle}\n\n`;
    content += cleanBody.join('\n').trim();
    if (url) content += `\n\n**Product Link:** ${url}`;

    return {
        name: entry.productName,
        content: content,
        tags: ['amazon', 'vine', 'review'],
        created_on: new Date(file.lastModified).toISOString(),
        updated_on: new Date(file.lastModified).toISOString(),
        original_filename: file.name,
        type_code: 'review'
    };
}

export const PARSERS = {
    standard: parse_standard_note,
    personal_log: parse_personal_log,
    amazon_vine: parse_amazon_vine
};