feat(journals): implement bulk Markdown import with multiple parsing strategies

- Added 'ae_journals_parsers.ts' with Standard, Personal Log, and Amazon Vine parsers (ported from Python)
- Created 'AeCompModalJournalImport' for file selection, preview, and API submission
- Integrated Import button into Journals list view
This commit is contained in:
Scott Idem
2026-01-13 23:22:46 -05:00
parent 8fd11d7224
commit d691fa8cb3
3 changed files with 443 additions and 1 deletions

View File

@@ -0,0 +1,214 @@
import { ae_util } from '$lib/ae_utils/ae_utils';
import type { key_val } from '$lib/stores/ae_stores';
export interface AeJournalEntryInput {
name: string;
content: string;
tags: string[];
created_on?: string; // ISO string
updated_on?: string; // ISO string
import_id?: string;
external_id?: string;
type_code?: string;
original_filename: string;
}
/**
* Standard Parser
* - Treats the whole file as one entry.
* - First line is title (if it looks like a title).
* - Rest is content.
*/
export async function parse_standard_note(file: File, text: string): Promise<AeJournalEntryInput[]> {
const lines = text.split('\n');
let name = file.name.replace(/\.md$/i, '').replace(/\.txt$/i, '');
let content = text;
const tags: string[] = [];
// Heuristic: If first line is a header, use it as name
if (lines.length > 0 && lines[0].startsWith('# ')) {
name = lines[0].substring(2).trim();
content = lines.slice(1).join('\n').trim();
} else if (lines.length > 0 && lines[0].trim().length > 0 && lines[0].trim().length < 60) {
// First line is short, treat as title if it doesn't look like frontmatter
if (lines[0].trim() !== '---') {
name = lines[0].trim();
content = lines.slice(1).join('\n').trim();
}
}
// Basic Frontmatter check (YAML style)
if (lines[0]?.trim() === '---') {
const endFrontmatter = lines.indexOf('---', 1);
if (endFrontmatter > -1) {
const frontmatter = lines.slice(1, endFrontmatter);
content = lines.slice(endFrontmatter + 1).join('\n').trim();
// Extract tags or title from frontmatter (very basic parsing)
frontmatter.forEach(line => {
if (line.startsWith('title:')) name = line.substring(6).trim().replace(/^['"]|['"]$/g, '');
if (line.startsWith('tags:')) {
// This is brittle, assumes inline tags like [a, b] or comma separated
const tagPart = line.substring(5).trim();
if (tagPart.startsWith('[') && tagPart.endsWith(']')) {
tagPart.substring(1, tagPart.length - 1).split(',').forEach(t => tags.push(t.trim()));
} else {
tagPart.split(',').forEach(t => tags.push(t.trim()));
}
}
});
}
}
const lastModified = new Date(file.lastModified).toISOString();
return [{
name,
content,
tags,
updated_on: lastModified,
created_on: lastModified, // We don't really know creation time from File object usually
original_filename: file.name,
type_code: 'note'
}];
}
/**
* Personal Log Parser
* - Splits file by dates: `## YYYY-MM-DD`
*/
export async function parse_personal_log(file: File, text: string): Promise<AeJournalEntryInput[]> {
const entries: AeJournalEntryInput[] = [];
const dateRegex = /^##\s+(\d{4}-\d{2}-\d{2})(.*)$/;
const lines = text.split('\n');
let currentEntry: Partial<AeJournalEntryInput> | null = null;
let currentContent: string[] = [];
// Check if the whole file is just one entry (no date headers)
if (!lines.some(l => dateRegex.test(l))) {
return parse_standard_note(file, text);
}
const fileBaseName = file.name.replace(/\.md$/i, '').replace(/\.txt$/i, '');
for (const line of lines) {
const match = line.match(dateRegex);
if (match) {
// Save previous entry
if (currentEntry) {
currentEntry.content = currentContent.join('\n').trim();
entries.push(currentEntry as AeJournalEntryInput);
}
// Start new entry
const dateStr = match[1];
const extraTitle = match[2].trim();
currentEntry = {
name: extraTitle ? `${dateStr} - ${extraTitle}` : `${fileBaseName} - ${dateStr}`,
created_on: `${dateStr}T12:00:00`, // Noon on that day
updated_on: new Date(file.lastModified).toISOString(),
tags: ['log'],
type_code: 'log',
original_filename: file.name,
// Reconstruct the header as part of content? Or just skip it?
// Python parser added it back: `## {date_str}\n\n{body}`
// Let's add it back for context.
};
currentContent = [`## ${dateStr} ${extraTitle}`];
} else {
if (currentEntry) {
currentContent.push(line);
} else {
// Preamble before first date header? Ignore or treat as separate?
// Ignoring for now or could be a "Header" entry.
}
}
}
// Push last entry
if (currentEntry) {
currentEntry.content = currentContent.join('\n').trim();
entries.push(currentEntry as AeJournalEntryInput);
}
return entries;
}
/**
* Amazon Vine Review Parser (from Python logic)
* - Splits by `## Product Name`
* - Looks for URL and `### Review Title`
*/
export async function parse_amazon_vine(file: File, text: string): Promise<AeJournalEntryInput[]> {
// Split by `\n## ` but we need to keep the delimiter or reconstruct
// JS split doesn't keep delimiter nicely unless captured.
// Let's iterate lines.
const entries: AeJournalEntryInput[] = [];
const productHeaderRegex = /^##\s+(.+)$/;
const lines = text.split('\n');
let currentEntry: any = null;
let currentBody: string[] = [];
for (const line of lines) {
const match = line.match(productHeaderRegex);
if (match) {
if (currentEntry) {
entries.push(format_vine_entry(currentEntry, currentBody, file));
}
currentEntry = { productName: match[1].trim() };
currentBody = [];
} else {
if (currentEntry) {
currentBody.push(line);
}
}
}
if (currentEntry) {
entries.push(format_vine_entry(currentEntry, currentBody, file));
}
return entries;
}
function format_vine_entry(entry: any, bodyLines: string[], file: File): AeJournalEntryInput {
let url = '';
let reviewTitle = '';
const cleanBody: string[] = [];
for (const line of bodyLines) {
const trimmed = line.trim();
if (!url && trimmed.startsWith('* http')) {
url = trimmed.replace(/^\*\s+/, '').split(' ')[0];
continue;
}
if (!reviewTitle && trimmed.startsWith('### ')) {
reviewTitle = trimmed.substring(4).trim();
continue;
}
cleanBody.push(line);
}
let content = '';
if (reviewTitle) content += `# ${reviewTitle}\n\n`;
content += cleanBody.join('\n').trim();
if (url) content += `\n\n**Product Link:** ${url}`;
return {
name: entry.productName,
content: content,
tags: ['amazon', 'vine', 'review'],
created_on: new Date(file.lastModified).toISOString(),
updated_on: new Date(file.lastModified).toISOString(),
original_filename: file.name,
type_code: 'review'
};
}
export const PARSERS = {
standard: parse_standard_note,
personal_log: parse_personal_log,
amazon_vine: parse_amazon_vine
};