feat(journals): implement bulk Markdown import with multiple parsing strategies

- Added 'ae_journals_parsers.ts' with Standard, Personal Log, and Amazon Vine parsers (ported from Python) - Created 'AeCompModalJournalImport' for file selection, preview, and API submission - Integrated Import button into Journals list view
2026-01-13 23:22:46 -05:00
parent 8fd11d7224
commit d691fa8cb3
3 changed files with 443 additions and 1 deletions
--- a/src/lib/ae_journals/ae_journals_parsers.ts
+++ b/src/lib/ae_journals/ae_journals_parsers.ts
@@ -0,0 +1,214 @@
+import { ae_util } from '$lib/ae_utils/ae_utils';
+import type { key_val } from '$lib/stores/ae_stores';
+
+export interface AeJournalEntryInput {
+    name: string;
+    content: string;
+    tags: string[];
+    created_on?: string; // ISO string
+    updated_on?: string; // ISO string
+    import_id?: string;
+    external_id?: string;
+    type_code?: string;
+    original_filename: string;
+}
+
+/**
+ * Standard Parser
+ * - Treats the whole file as one entry.
+ * - First line is title (if it looks like a title).
+ * - Rest is content.
+ */
+export async function parse_standard_note(file: File, text: string): Promise<AeJournalEntryInput[]> {
+    const lines = text.split('\n');
+    let name = file.name.replace(/\.md$/i, '').replace(/\.txt$/i, '');
+    let content = text;
+    const tags: string[] = [];
+
+    // Heuristic: If first line is a header, use it as name
+    if (lines.length > 0 && lines[0].startsWith('# ')) {
+        name = lines[0].substring(2).trim();
+        content = lines.slice(1).join('\n').trim();
+    } else if (lines.length > 0 && lines[0].trim().length > 0 && lines[0].trim().length < 60) {
+        // First line is short, treat as title if it doesn't look like frontmatter
+        if (lines[0].trim() !== '---') {
+             name = lines[0].trim();
+             content = lines.slice(1).join('\n').trim();
+        }
+    }
+
+    // Basic Frontmatter check (YAML style)
+    if (lines[0]?.trim() === '---') {
+        const endFrontmatter = lines.indexOf('---', 1);
+        if (endFrontmatter > -1) {
+            const frontmatter = lines.slice(1, endFrontmatter);
+            content = lines.slice(endFrontmatter + 1).join('\n').trim();
+            
+            // Extract tags or title from frontmatter (very basic parsing)
+            frontmatter.forEach(line => {
+                if (line.startsWith('title:')) name = line.substring(6).trim().replace(/^['"]|['"]$/g, '');
+                if (line.startsWith('tags:')) {
+                    // This is brittle, assumes inline tags like [a, b] or comma separated
+                    const tagPart = line.substring(5).trim();
+                    if (tagPart.startsWith('[') && tagPart.endsWith(']')) {
+                        tagPart.substring(1, tagPart.length - 1).split(',').forEach(t => tags.push(t.trim()));
+                    } else {
+                        tagPart.split(',').forEach(t => tags.push(t.trim()));
+                    }
+                }
+            });
+        }
+    }
+
+    const lastModified = new Date(file.lastModified).toISOString();
+
+    return [{
+        name,
+        content,
+        tags,
+        updated_on: lastModified,
+        created_on: lastModified, // We don't really know creation time from File object usually
+        original_filename: file.name,
+        type_code: 'note'
+    }];
+}
+
+/**
+ * Personal Log Parser
+ * - Splits file by dates: `## YYYY-MM-DD`
+ */
+export async function parse_personal_log(file: File, text: string): Promise<AeJournalEntryInput[]> {
+    const entries: AeJournalEntryInput[] = [];
+    const dateRegex = /^##\s+(\d{4}-\d{2}-\d{2})(.*)$/;
+    
+    const lines = text.split('\n');
+    let currentEntry: Partial<AeJournalEntryInput> | null = null;
+    let currentContent: string[] = [];
+    
+    // Check if the whole file is just one entry (no date headers)
+    if (!lines.some(l => dateRegex.test(l))) {
+        return parse_standard_note(file, text);
+    }
+
+    const fileBaseName = file.name.replace(/\.md$/i, '').replace(/\.txt$/i, '');
+
+    for (const line of lines) {
+        const match = line.match(dateRegex);
+        if (match) {
+            // Save previous entry
+            if (currentEntry) {
+                currentEntry.content = currentContent.join('\n').trim();
+                entries.push(currentEntry as AeJournalEntryInput);
+            }
+
+            // Start new entry
+            const dateStr = match[1];
+            const extraTitle = match[2].trim();
+            
+            currentEntry = {
+                name: extraTitle ? `${dateStr} - ${extraTitle}` : `${fileBaseName} - ${dateStr}`,
+                created_on: `${dateStr}T12:00:00`, // Noon on that day
+                updated_on: new Date(file.lastModified).toISOString(),
+                tags: ['log'],
+                type_code: 'log',
+                original_filename: file.name,
+                // Reconstruct the header as part of content? Or just skip it?
+                // Python parser added it back: `## {date_str}\n\n{body}`
+                // Let's add it back for context.
+            };
+            currentContent = [`## ${dateStr} ${extraTitle}`]; 
+        } else {
+            if (currentEntry) {
+                currentContent.push(line);
+            } else {
+                // Preamble before first date header? Ignore or treat as separate?
+                // Ignoring for now or could be a "Header" entry.
+            }
+        }
+    }
+
+    // Push last entry
+    if (currentEntry) {
+        currentEntry.content = currentContent.join('\n').trim();
+        entries.push(currentEntry as AeJournalEntryInput);
+    }
+
+    return entries;
+}
+
+/**
+ * Amazon Vine Review Parser (from Python logic)
+ * - Splits by `## Product Name`
+ * - Looks for URL and `### Review Title`
+ */
+export async function parse_amazon_vine(file: File, text: string): Promise<AeJournalEntryInput[]> {
+    // Split by `\n## ` but we need to keep the delimiter or reconstruct
+    // JS split doesn't keep delimiter nicely unless captured.
+    // Let's iterate lines.
+    const entries: AeJournalEntryInput[] = [];
+    const productHeaderRegex = /^##\s+(.+)$/;
+    
+    const lines = text.split('\n');
+    let currentEntry: any = null;
+    let currentBody: string[] = [];
+    
+    for (const line of lines) {
+        const match = line.match(productHeaderRegex);
+        if (match) {
+             if (currentEntry) {
+                 entries.push(format_vine_entry(currentEntry, currentBody, file));
+             }
+             currentEntry = { productName: match[1].trim() };
+             currentBody = [];
+        } else {
+            if (currentEntry) {
+                currentBody.push(line);
+            }
+        }
+    }
+    if (currentEntry) {
+        entries.push(format_vine_entry(currentEntry, currentBody, file));
+    }
+    
+    return entries;
+}
+
+function format_vine_entry(entry: any, bodyLines: string[], file: File): AeJournalEntryInput {
+    let url = '';
+    let reviewTitle = '';
+    const cleanBody: string[] = [];
+    
+    for (const line of bodyLines) {
+        const trimmed = line.trim();
+        if (!url && trimmed.startsWith('* http')) {
+            url = trimmed.replace(/^\*\s+/, '').split(' ')[0];
+            continue;
+        }
+        if (!reviewTitle && trimmed.startsWith('### ')) {
+            reviewTitle = trimmed.substring(4).trim();
+            continue;
+        }
+        cleanBody.push(line);
+    }
+    
+    let content = '';
+    if (reviewTitle) content += `# ${reviewTitle}\n\n`;
+    content += cleanBody.join('\n').trim();
+    if (url) content += `\n\n**Product Link:** ${url}`;
+    
+    return {
+        name: entry.productName,
+        content: content,
+        tags: ['amazon', 'vine', 'review'],
+        created_on: new Date(file.lastModified).toISOString(),
+        updated_on: new Date(file.lastModified).toISOString(),
+        original_filename: file.name,
+        type_code: 'review'
+    };
+}
+
+export const PARSERS = {
+    standard: parse_standard_note,
+    personal_log: parse_personal_log,
+    amazon_vine: parse_amazon_vine
+};