import { ae_util } from '$lib/ae_utils/ae_utils'; import type { key_val } from '$lib/stores/ae_stores'; export interface AeJournalEntryInput { name: string; content: string; tags: string[]; created_on?: string; // ISO string updated_on?: string; // ISO string import_id?: string; external_id?: string; type_code?: string; original_filename: string; } /** * Standard Parser * - Treats the whole file as one entry. * - First line is title (if it looks like a title). * - Rest is content. */ export async function parse_standard_note( file: File, text: string ): Promise { const lines = text.split('\n'); let name = file.name.replace(/\.md$/i, '').replace(/\.txt$/i, ''); let content = text; const tags: string[] = []; // Heuristic: If first line is a header, use it as name if (lines.length > 0 && lines[0].startsWith('# ')) { name = lines[0].substring(2).trim(); content = lines.slice(1).join('\n').trim(); } else if ( lines.length > 0 && lines[0].trim().length > 0 && lines[0].trim().length < 60 ) { // First line is short, treat as title if it doesn't look like frontmatter if (lines[0].trim() !== '---') { name = lines[0].trim(); content = lines.slice(1).join('\n').trim(); } } // Basic Frontmatter check (YAML style) if (lines[0]?.trim() === '---') { const endFrontmatter = lines.indexOf('---', 1); if (endFrontmatter > -1) { const frontmatter = lines.slice(1, endFrontmatter); content = lines .slice(endFrontmatter + 1) .join('\n') .trim(); // Extract tags or title from frontmatter (very basic parsing) frontmatter.forEach((line) => { if (line.startsWith('title:')) name = line .substring(6) .trim() .replace(/^['"]|['"]$/g, ''); if (line.startsWith('tags:')) { // This is brittle, assumes inline tags like [a, b] or comma separated const tagPart = line.substring(5).trim(); if (tagPart.startsWith('[') && tagPart.endsWith(']')) { tagPart .substring(1, tagPart.length - 1) .split(',') .forEach((t) => tags.push(t.trim())); } else { tagPart.split(',').forEach((t) => tags.push(t.trim())); } } }); } } const lastModified = new Date(file.lastModified).toISOString(); return [ { name, content, tags, updated_on: lastModified, created_on: lastModified, // We don't really know creation time from File object usually original_filename: file.name, type_code: 'note' } ]; } /** * Personal Log Parser * - Splits file by dates: `## YYYY-MM-DD` */ export async function parse_personal_log( file: File, text: string ): Promise { const entries: AeJournalEntryInput[] = []; const dateRegex = /^##\s+(\d{4}-\d{2}-\d{2})(.*)$/; const lines = text.split('\n'); let currentEntry: Partial | null = null; let currentContent: string[] = []; // Check if the whole file is just one entry (no date headers) if (!lines.some((l) => dateRegex.test(l))) { return parse_standard_note(file, text); } const fileBaseName = file.name.replace(/\.md$/i, '').replace(/\.txt$/i, ''); for (const line of lines) { const match = line.match(dateRegex); if (match) { // Save previous entry if (currentEntry) { currentEntry.content = currentContent.join('\n').trim(); entries.push(currentEntry as AeJournalEntryInput); } // Start new entry const dateStr = match[1]; const extraTitle = match[2].trim(); currentEntry = { name: extraTitle ? `${dateStr} - ${extraTitle}` : `${fileBaseName} - ${dateStr}`, created_on: `${dateStr}T12:00:00`, // Noon on that day updated_on: new Date(file.lastModified).toISOString(), tags: ['log'], type_code: 'log', original_filename: file.name // Reconstruct the header as part of content? Or just skip it? // Python parser added it back: `## {date_str}\n\n{body}` // Let's add it back for context. }; currentContent = [`## ${dateStr} ${extraTitle}`]; } else { if (currentEntry) { currentContent.push(line); } else { // Preamble before first date header? Ignore or treat as separate? // Ignoring for now or could be a "Header" entry. } } } // Push last entry if (currentEntry) { currentEntry.content = currentContent.join('\n').trim(); entries.push(currentEntry as AeJournalEntryInput); } return entries; } /** * Amazon Vine Review Parser (from Python logic) * - Splits by `## Product Name` * - Looks for URL and `### Review Title` */ export async function parse_amazon_vine( file: File, text: string ): Promise { // Split by `\n## ` but we need to keep the delimiter or reconstruct // JS split doesn't keep delimiter nicely unless captured. // Let's iterate lines. const entries: AeJournalEntryInput[] = []; const productHeaderRegex = /^##\s+(.+)$/; const lines = text.split('\n'); let currentEntry: any = null; let currentBody: string[] = []; for (const line of lines) { const match = line.match(productHeaderRegex); if (match) { if (currentEntry) { entries.push( format_vine_entry(currentEntry, currentBody, file) ); } currentEntry = { productName: match[1].trim() }; currentBody = []; } else { if (currentEntry) { currentBody.push(line); } } } if (currentEntry) { entries.push(format_vine_entry(currentEntry, currentBody, file)); } return entries; } function format_vine_entry( entry: any, bodyLines: string[], file: File ): AeJournalEntryInput { let url = ''; let reviewTitle = ''; const cleanBody: string[] = []; for (const line of bodyLines) { const trimmed = line.trim(); if (!url && trimmed.startsWith('* http')) { url = trimmed.replace(/^\*\s+/, '').split(' ')[0]; continue; } if (!reviewTitle && trimmed.startsWith('### ')) { reviewTitle = trimmed.substring(4).trim(); continue; } cleanBody.push(line); } let content = ''; if (reviewTitle) content += `# ${reviewTitle}\n\n`; content += cleanBody.join('\n').trim(); if (url) content += `\n\n**Product Link:** ${url}`; return { name: entry.productName, content: content, tags: ['amazon', 'vine', 'review'], created_on: new Date(file.lastModified).toISOString(), updated_on: new Date(file.lastModified).toISOString(), original_filename: file.name, type_code: 'review' }; } export const PARSERS = { standard: parse_standard_note, personal_log: parse_personal_log, amazon_vine: parse_amazon_vine };