// src/utils/markdown.ts // Simple markdown parser for client-side preview functionality // Note: For production, consider using a proper markdown library like marked or markdown-it export interface MarkdownParseOptions { sanitize?: boolean; breaks?: boolean; linkTarget?: string; } export class SimpleMarkdownParser { private options: MarkdownParseOptions; constructor(options: MarkdownParseOptions = {}) { this.options = { sanitize: true, breaks: true, linkTarget: '_blank', ...options }; } /** * Parse markdown to HTML */ parse(markdown: string): string { if (!markdown || markdown.trim().length === 0) { return ''; } let html = markdown; // Handle code blocks first (to prevent processing content inside them) html = this.parseCodeBlocks(html); // Parse headers html = this.parseHeaders(html); // Parse bold and italic html = this.parseEmphasis(html); // Parse links and images html = this.parseLinksAndImages(html); // Parse inline code html = this.parseInlineCode(html); // Parse lists html = this.parseLists(html); // Parse blockquotes html = this.parseBlockquotes(html); // Parse horizontal rules html = this.parseHorizontalRules(html); // Parse line breaks and paragraphs html = this.parseLineBreaks(html); // Sanitize if needed if (this.options.sanitize) { html = this.sanitizeHtml(html); } return html.trim(); } private parseCodeBlocks(html: string): string { // Replace code blocks with placeholders to protect them const codeBlocks: string[] = []; // Match ```code``` blocks html = html.replace(/```([\s\S]*?)```/g, (match, code) => { const index = codeBlocks.length; const lang = code.split('\n')[0].trim(); const content = code.includes('\n') ? code.substring(code.indexOf('\n') + 1) : code; codeBlocks.push(`

${this.escapeHtml(content.trim())}

`); return `__CODEBLOCK_${index}__`; }); // Restore code blocks at the end codeBlocks.forEach((block, index) => { html = html.replace(`__CODEBLOCK_${index}__`, block); }); return html; } private parseHeaders(html: string): string { // H1-H6 headers for (let i = 6; i >= 1; i--) { const headerRegex = new RegExp(`^#{${i}}\\s+(.+)$`, 'gm'); html = html.replace(headerRegex, `$1`); } return html; } private parseEmphasis(html: string): string { // Bold: **text** or __text__ html = html.replace(/\*\*(.*?)\*\*/g, '$1'); html = html.replace(/__(.*?)__/g, '$1'); // Italic: *text* or _text_ html = html.replace(/\*(.*?)\*/g, '$1'); html = html.replace(/_(.*?)_/g, '$1'); return html; } private parseLinksAndImages(html: string): string { const linkTarget = this.options.linkTarget ? ` target="${this.options.linkTarget}" rel="noopener noreferrer"` : ''; // Images: ![alt](src) html = html.replace(/!\[([^\]]*)\]$([^)]*)$/g, '

'); // Links: [text](url) html = html.replace(/\[([^\]]*)\]$([^)]*)$/g, `$1`); return html; } private parseInlineCode(html: string): string { // Inline code: `code` html = html.replace(/`([^`]*)`/g, '$1'); return html; } private parseLists(html: string): string { // Unordered lists html = html.replace(/^[\s]*[-*+]\s+(.+)$/gm, '

'); // Ordered lists html = html.replace(/^[\s]*\d+\.\s+(.+)$/gm, '

'); // Wrap consecutive list items in ul/ol html = html.replace(/(

.*<\/li>)/s, (match) => { // Simple approach: assume unordered list return `

${match}`; }); return html; } private parseBlockquotes(html: string): string { // Blockquotes: > text html = html.replace(/^>\s+(.+)$/gm, '

$1

'); // Merge consecutive blockquotes html = html.replace(/(<\/blockquote>)\s*(

)/g, ' '); return html; } private parseHorizontalRules(html: string): string { // Horizontal rules: --- or *** html = html.replace(/^[-*]{3,}$/gm, '
'); return html; } private parseLineBreaks(html: string): string { if (!this.options.breaks) { return html; } // Split into paragraphs (double line breaks) const paragraphs = html.split(/\n\s*\n/); const processedParagraphs = paragraphs.map(paragraph => { const trimmed = paragraph.trim(); // Skip if already wrapped in HTML tag if (trimmed.startsWith('<') && trimmed.endsWith('>')) { return trimmed; } // Single line breaks become
const withBreaks = trimmed.replace(/\n/g, '
'); // Wrap in paragraph if not empty and not already a block element if (withBreaks && !this.isBlockElement(withBreaks)) { return `
${withBreaks}
`; } return withBreaks; }); return processedParagraphs.filter(p => p.trim()).join('\n\n'); } private isBlockElement(html: string): boolean { const blockTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'ul', 'ol', 'li', 'blockquote', 'pre', 'hr']; return blockTags.some(tag => html.startsWith(`<${tag}`)); } private sanitizeHtml(html: string): string { // Very basic HTML sanitization - for production use a proper library const allowedTags = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'strong', 'em', 'code', 'pre', 'a', 'img', 'ul', 'ol', 'li', 'blockquote', 'hr' ]; // Remove script tags and event handlers html = html.replace(/]*>[\s\S]*?<\/script>/gi, ''); html = html.replace(/\bon\w+\s*=\s*"[^"]*"/gi, ''); html = html.replace(/\bon\w+\s*=\s*'[^']*'/gi, ''); html = html.replace(/javascript:/gi, ''); // This is a very basic sanitizer - for production use a proper library like DOMPurify return html; } private escapeHtml(text: string): string { const div = document.createElement('div'); div.textContent = text; return div.innerHTML; } /** * Extract plain text from markdown (for word/character counting) */ extractText(markdown: string): string { // Remove markdown syntax and return plain text let text = markdown; // Remove code blocks text = text.replace(/```[\s\S]*?```/g, ''); // Remove inline code text = text.replace(/`[^`]*`/g, ''); // Remove images text = text.replace(/!\[[^\]]*\]$[^)]*$/g, ''); // Remove links but keep text text = text.replace(/\[([^\]]*)\]$[^)]*$/g, '$1'); // Remove headers text = text.replace(/^#{1,6}\s+/gm, ''); // Remove emphasis text = text.replace(/\*\*(.*?)\*\*/g, '$1'); text = text.replace(/\*(.*?)\*/g, '$1'); text = text.replace(/__(.*?)__/g, '$1'); text = text.replace(/_(.*?)_/g, '$1'); // Remove blockquotes text = text.replace(/^>\s+/gm, ''); // Remove list markers text = text.replace(/^[\s]*[-*+]\s+/gm, ''); text = text.replace(/^[\s]*\d+\.\s+/gm, ''); // Remove horizontal rules text = text.replace(/^[-*]{3,}$/gm, ''); // Clean up whitespace text = text.replace(/\n+/g, ' ').replace(/\s+/g, ' ').trim(); return text; } /** * Count words in markdown text */ countWords(markdown: string): number { const plainText = this.extractText(markdown); if (!plainText.trim()) return 0; return plainText.trim().split(/\s+/).length; } /** * Count characters in markdown text */ countCharacters(markdown: string): number { return this.extractText(markdown).length; } /** * Generate table of contents from headers */ generateTOC(markdown: string): Array<{level: number, text: string, anchor: string}> { const headers: Array<{level: number, text: string, anchor: string}> = []; const lines = markdown.split('\n'); lines.forEach(line => { const headerMatch = line.match(/^(#{1,6})\s+(.+)$/); if (headerMatch) { const level = headerMatch[1].length; const text = headerMatch[2].trim(); const anchor = text.toLowerCase() .replace(/[^a-z0-9\s-]/g, '') .replace(/\s+/g, '-') .replace(/-+/g, '-') .replace(/^-|-$/g, ''); headers.push({ level, text, anchor }); } }); return headers; } } // Convenience functions for global use export function parseMarkdown(markdown: string, options?: MarkdownParseOptions): string { const parser = new SimpleMarkdownParser(options); return parser.parse(markdown); } export function extractTextFromMarkdown(markdown: string): string { const parser = new SimpleMarkdownParser(); return parser.extractText(markdown); } export function countWordsInMarkdown(markdown: string): number { const parser = new SimpleMarkdownParser(); return parser.countWords(markdown); } export function countCharactersInMarkdown(markdown: string): number { const parser = new SimpleMarkdownParser(); return parser.countCharacters(markdown); } export function generateMarkdownTOC(markdown: string): Array<{level: number, text: string, anchor: string}> { const parser = new SimpleMarkdownParser(); return parser.generateTOC(markdown); }