2025-07-23 21:06:39 +02:00

332 lines
9.5 KiB
TypeScript

// src/utils/markdown.ts
// Simple markdown parser for client-side preview functionality
// Note: For production, consider using a proper markdown library like marked or markdown-it
export interface MarkdownParseOptions {
sanitize?: boolean;
breaks?: boolean;
linkTarget?: string;
}
export class SimpleMarkdownParser {
private options: MarkdownParseOptions;
constructor(options: MarkdownParseOptions = {}) {
this.options = {
sanitize: true,
breaks: true,
linkTarget: '_blank',
...options
};
}
/**
* Parse markdown to HTML
*/
parse(markdown: string): string {
if (!markdown || markdown.trim().length === 0) {
return '';
}
let html = markdown;
// Handle code blocks first (to prevent processing content inside them)
html = this.parseCodeBlocks(html);
// Parse headers
html = this.parseHeaders(html);
// Parse bold and italic
html = this.parseEmphasis(html);
// Parse links and images
html = this.parseLinksAndImages(html);
// Parse inline code
html = this.parseInlineCode(html);
// Parse lists
html = this.parseLists(html);
// Parse blockquotes
html = this.parseBlockquotes(html);
// Parse horizontal rules
html = this.parseHorizontalRules(html);
// Parse line breaks and paragraphs
html = this.parseLineBreaks(html);
// Sanitize if needed
if (this.options.sanitize) {
html = this.sanitizeHtml(html);
}
return html.trim();
}
private parseCodeBlocks(html: string): string {
// Replace code blocks with placeholders to protect them
const codeBlocks: string[] = [];
// Match ```code``` blocks
html = html.replace(/```([\s\S]*?)```/g, (match, code) => {
const index = codeBlocks.length;
const lang = code.split('\n')[0].trim();
const content = code.includes('\n') ? code.substring(code.indexOf('\n') + 1) : code;
codeBlocks.push(`<pre><code class="language-${this.escapeHtml(lang)}">${this.escapeHtml(content.trim())}</code></pre>`);
return `__CODEBLOCK_${index}__`;
});
// Restore code blocks at the end
codeBlocks.forEach((block, index) => {
html = html.replace(`__CODEBLOCK_${index}__`, block);
});
return html;
}
private parseHeaders(html: string): string {
// H1-H6 headers
for (let i = 6; i >= 1; i--) {
const headerRegex = new RegExp(`^#{${i}}\\s+(.+)$`, 'gm');
html = html.replace(headerRegex, `<h${i}>$1</h${i}>`);
}
return html;
}
private parseEmphasis(html: string): string {
// Bold: **text** or __text__
html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
html = html.replace(/__(.*?)__/g, '<strong>$1</strong>');
// Italic: *text* or _text_
html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
html = html.replace(/_(.*?)_/g, '<em>$1</em>');
return html;
}
private parseLinksAndImages(html: string): string {
const linkTarget = this.options.linkTarget ? ` target="${this.options.linkTarget}" rel="noopener noreferrer"` : '';
// Images: ![alt](src)
html = html.replace(/!\[([^\]]*)\]\(([^)]*)\)/g,
'<img src="$2" alt="$1" style="max-width: 100%; height: auto; border-radius: 0.25rem; margin: 0.5rem 0;" />');
// Links: [text](url)
html = html.replace(/\[([^\]]*)\]\(([^)]*)\)/g,
`<a href="$2"${linkTarget}>$1</a>`);
return html;
}
private parseInlineCode(html: string): string {
// Inline code: `code`
html = html.replace(/`([^`]*)`/g, '<code>$1</code>');
return html;
}
private parseLists(html: string): string {
// Unordered lists
html = html.replace(/^[\s]*[-*+]\s+(.+)$/gm, '<li>$1</li>');
// Ordered lists
html = html.replace(/^[\s]*\d+\.\s+(.+)$/gm, '<li>$1</li>');
// Wrap consecutive list items in ul/ol
html = html.replace(/(<li>.*<\/li>)/s, (match) => {
// Simple approach: assume unordered list
return `<ul>${match}</ul>`;
});
return html;
}
private parseBlockquotes(html: string): string {
// Blockquotes: > text
html = html.replace(/^>\s+(.+)$/gm, '<blockquote>$1</blockquote>');
// Merge consecutive blockquotes
html = html.replace(/(<\/blockquote>)\s*(<blockquote>)/g, ' ');
return html;
}
private parseHorizontalRules(html: string): string {
// Horizontal rules: --- or ***
html = html.replace(/^[-*]{3,}$/gm, '<hr>');
return html;
}
private parseLineBreaks(html: string): string {
if (!this.options.breaks) {
return html;
}
// Split into paragraphs (double line breaks)
const paragraphs = html.split(/\n\s*\n/);
const processedParagraphs = paragraphs.map(paragraph => {
const trimmed = paragraph.trim();
// Skip if already wrapped in HTML tag
if (trimmed.startsWith('<') && trimmed.endsWith('>')) {
return trimmed;
}
// Single line breaks become <br>
const withBreaks = trimmed.replace(/\n/g, '<br>');
// Wrap in paragraph if not empty and not already a block element
if (withBreaks && !this.isBlockElement(withBreaks)) {
return `<p>${withBreaks}</p>`;
}
return withBreaks;
});
return processedParagraphs.filter(p => p.trim()).join('\n\n');
}
private isBlockElement(html: string): boolean {
const blockTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'ul', 'ol', 'li', 'blockquote', 'pre', 'hr'];
return blockTags.some(tag => html.startsWith(`<${tag}`));
}
private sanitizeHtml(html: string): string {
// Very basic HTML sanitization - for production use a proper library
const allowedTags = [
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'p', 'br', 'strong', 'em', 'code', 'pre',
'a', 'img', 'ul', 'ol', 'li', 'blockquote', 'hr'
];
// Remove script tags and event handlers
html = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
html = html.replace(/\bon\w+\s*=\s*"[^"]*"/gi, '');
html = html.replace(/\bon\w+\s*=\s*'[^']*'/gi, '');
html = html.replace(/javascript:/gi, '');
// This is a very basic sanitizer - for production use a proper library like DOMPurify
return html;
}
private escapeHtml(text: string): string {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
/**
* Extract plain text from markdown (for word/character counting)
*/
extractText(markdown: string): string {
// Remove markdown syntax and return plain text
let text = markdown;
// Remove code blocks
text = text.replace(/```[\s\S]*?```/g, '');
// Remove inline code
text = text.replace(/`[^`]*`/g, '');
// Remove images
text = text.replace(/!\[[^\]]*\]\([^)]*\)/g, '');
// Remove links but keep text
text = text.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1');
// Remove headers
text = text.replace(/^#{1,6}\s+/gm, '');
// Remove emphasis
text = text.replace(/\*\*(.*?)\*\*/g, '$1');
text = text.replace(/\*(.*?)\*/g, '$1');
text = text.replace(/__(.*?)__/g, '$1');
text = text.replace(/_(.*?)_/g, '$1');
// Remove blockquotes
text = text.replace(/^>\s+/gm, '');
// Remove list markers
text = text.replace(/^[\s]*[-*+]\s+/gm, '');
text = text.replace(/^[\s]*\d+\.\s+/gm, '');
// Remove horizontal rules
text = text.replace(/^[-*]{3,}$/gm, '');
// Clean up whitespace
text = text.replace(/\n+/g, ' ').replace(/\s+/g, ' ').trim();
return text;
}
/**
* Count words in markdown text
*/
countWords(markdown: string): number {
const plainText = this.extractText(markdown);
if (!plainText.trim()) return 0;
return plainText.trim().split(/\s+/).length;
}
/**
* Count characters in markdown text
*/
countCharacters(markdown: string): number {
return this.extractText(markdown).length;
}
/**
* Generate table of contents from headers
*/
generateTOC(markdown: string): Array<{level: number, text: string, anchor: string}> {
const headers: Array<{level: number, text: string, anchor: string}> = [];
const lines = markdown.split('\n');
lines.forEach(line => {
const headerMatch = line.match(/^(#{1,6})\s+(.+)$/);
if (headerMatch) {
const level = headerMatch[1].length;
const text = headerMatch[2].trim();
const anchor = text.toLowerCase()
.replace(/[^a-z0-9\s-]/g, '')
.replace(/\s+/g, '-')
.replace(/-+/g, '-')
.replace(/^-|-$/g, '');
headers.push({ level, text, anchor });
}
});
return headers;
}
}
// Convenience functions for global use
export function parseMarkdown(markdown: string, options?: MarkdownParseOptions): string {
const parser = new SimpleMarkdownParser(options);
return parser.parse(markdown);
}
export function extractTextFromMarkdown(markdown: string): string {
const parser = new SimpleMarkdownParser();
return parser.extractText(markdown);
}
export function countWordsInMarkdown(markdown: string): number {
const parser = new SimpleMarkdownParser();
return parser.countWords(markdown);
}
export function countCharactersInMarkdown(markdown: string): number {
const parser = new SimpleMarkdownParser();
return parser.countCharacters(markdown);
}
export function generateMarkdownTOC(markdown: string): Array<{level: number, text: string, anchor: string}> {
const parser = new SimpleMarkdownParser();
return parser.generateTOC(markdown);
}