332 lines
9.5 KiB
TypeScript
332 lines
9.5 KiB
TypeScript
// src/utils/markdown.ts
|
|
// Simple markdown parser for client-side preview functionality
|
|
// Note: For production, consider using a proper markdown library like marked or markdown-it
|
|
|
|
export interface MarkdownParseOptions {
|
|
sanitize?: boolean;
|
|
breaks?: boolean;
|
|
linkTarget?: string;
|
|
}
|
|
|
|
export class SimpleMarkdownParser {
|
|
private options: MarkdownParseOptions;
|
|
|
|
constructor(options: MarkdownParseOptions = {}) {
|
|
this.options = {
|
|
sanitize: true,
|
|
breaks: true,
|
|
linkTarget: '_blank',
|
|
...options
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Parse markdown to HTML
|
|
*/
|
|
parse(markdown: string): string {
|
|
if (!markdown || markdown.trim().length === 0) {
|
|
return '';
|
|
}
|
|
|
|
let html = markdown;
|
|
|
|
// Handle code blocks first (to prevent processing content inside them)
|
|
html = this.parseCodeBlocks(html);
|
|
|
|
// Parse headers
|
|
html = this.parseHeaders(html);
|
|
|
|
// Parse bold and italic
|
|
html = this.parseEmphasis(html);
|
|
|
|
// Parse links and images
|
|
html = this.parseLinksAndImages(html);
|
|
|
|
// Parse inline code
|
|
html = this.parseInlineCode(html);
|
|
|
|
// Parse lists
|
|
html = this.parseLists(html);
|
|
|
|
// Parse blockquotes
|
|
html = this.parseBlockquotes(html);
|
|
|
|
// Parse horizontal rules
|
|
html = this.parseHorizontalRules(html);
|
|
|
|
// Parse line breaks and paragraphs
|
|
html = this.parseLineBreaks(html);
|
|
|
|
// Sanitize if needed
|
|
if (this.options.sanitize) {
|
|
html = this.sanitizeHtml(html);
|
|
}
|
|
|
|
return html.trim();
|
|
}
|
|
|
|
private parseCodeBlocks(html: string): string {
|
|
// Replace code blocks with placeholders to protect them
|
|
const codeBlocks: string[] = [];
|
|
|
|
// Match ```code``` blocks
|
|
html = html.replace(/```([\s\S]*?)```/g, (match, code) => {
|
|
const index = codeBlocks.length;
|
|
const lang = code.split('\n')[0].trim();
|
|
const content = code.includes('\n') ? code.substring(code.indexOf('\n') + 1) : code;
|
|
|
|
codeBlocks.push(`<pre><code class="language-${this.escapeHtml(lang)}">${this.escapeHtml(content.trim())}</code></pre>`);
|
|
return `__CODEBLOCK_${index}__`;
|
|
});
|
|
|
|
// Restore code blocks at the end
|
|
codeBlocks.forEach((block, index) => {
|
|
html = html.replace(`__CODEBLOCK_${index}__`, block);
|
|
});
|
|
|
|
return html;
|
|
}
|
|
|
|
private parseHeaders(html: string): string {
|
|
// H1-H6 headers
|
|
for (let i = 6; i >= 1; i--) {
|
|
const headerRegex = new RegExp(`^#{${i}}\\s+(.+)$`, 'gm');
|
|
html = html.replace(headerRegex, `<h${i}>$1</h${i}>`);
|
|
}
|
|
return html;
|
|
}
|
|
|
|
private parseEmphasis(html: string): string {
|
|
// Bold: **text** or __text__
|
|
html = html.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
|
|
html = html.replace(/__(.*?)__/g, '<strong>$1</strong>');
|
|
|
|
// Italic: *text* or _text_
|
|
html = html.replace(/\*(.*?)\*/g, '<em>$1</em>');
|
|
html = html.replace(/_(.*?)_/g, '<em>$1</em>');
|
|
|
|
return html;
|
|
}
|
|
|
|
private parseLinksAndImages(html: string): string {
|
|
const linkTarget = this.options.linkTarget ? ` target="${this.options.linkTarget}" rel="noopener noreferrer"` : '';
|
|
|
|
// Images: 
|
|
html = html.replace(/!\[([^\]]*)\]\(([^)]*)\)/g,
|
|
'<img src="$2" alt="$1" style="max-width: 100%; height: auto; border-radius: 0.25rem; margin: 0.5rem 0;" />');
|
|
|
|
// Links: [text](url)
|
|
html = html.replace(/\[([^\]]*)\]\(([^)]*)\)/g,
|
|
`<a href="$2"${linkTarget}>$1</a>`);
|
|
|
|
return html;
|
|
}
|
|
|
|
private parseInlineCode(html: string): string {
|
|
// Inline code: `code`
|
|
html = html.replace(/`([^`]*)`/g, '<code>$1</code>');
|
|
return html;
|
|
}
|
|
|
|
private parseLists(html: string): string {
|
|
// Unordered lists
|
|
html = html.replace(/^[\s]*[-*+]\s+(.+)$/gm, '<li>$1</li>');
|
|
|
|
// Ordered lists
|
|
html = html.replace(/^[\s]*\d+\.\s+(.+)$/gm, '<li>$1</li>');
|
|
|
|
// Wrap consecutive list items in ul/ol
|
|
html = html.replace(/(<li>.*<\/li>)/s, (match) => {
|
|
// Simple approach: assume unordered list
|
|
return `<ul>${match}</ul>`;
|
|
});
|
|
|
|
return html;
|
|
}
|
|
|
|
private parseBlockquotes(html: string): string {
|
|
// Blockquotes: > text
|
|
html = html.replace(/^>\s+(.+)$/gm, '<blockquote>$1</blockquote>');
|
|
|
|
// Merge consecutive blockquotes
|
|
html = html.replace(/(<\/blockquote>)\s*(<blockquote>)/g, ' ');
|
|
|
|
return html;
|
|
}
|
|
|
|
private parseHorizontalRules(html: string): string {
|
|
// Horizontal rules: --- or ***
|
|
html = html.replace(/^[-*]{3,}$/gm, '<hr>');
|
|
return html;
|
|
}
|
|
|
|
private parseLineBreaks(html: string): string {
|
|
if (!this.options.breaks) {
|
|
return html;
|
|
}
|
|
|
|
// Split into paragraphs (double line breaks)
|
|
const paragraphs = html.split(/\n\s*\n/);
|
|
|
|
const processedParagraphs = paragraphs.map(paragraph => {
|
|
const trimmed = paragraph.trim();
|
|
|
|
// Skip if already wrapped in HTML tag
|
|
if (trimmed.startsWith('<') && trimmed.endsWith('>')) {
|
|
return trimmed;
|
|
}
|
|
|
|
// Single line breaks become <br>
|
|
const withBreaks = trimmed.replace(/\n/g, '<br>');
|
|
|
|
// Wrap in paragraph if not empty and not already a block element
|
|
if (withBreaks && !this.isBlockElement(withBreaks)) {
|
|
return `<p>${withBreaks}</p>`;
|
|
}
|
|
|
|
return withBreaks;
|
|
});
|
|
|
|
return processedParagraphs.filter(p => p.trim()).join('\n\n');
|
|
}
|
|
|
|
private isBlockElement(html: string): boolean {
|
|
const blockTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'ul', 'ol', 'li', 'blockquote', 'pre', 'hr'];
|
|
return blockTags.some(tag => html.startsWith(`<${tag}`));
|
|
}
|
|
|
|
private sanitizeHtml(html: string): string {
|
|
// Very basic HTML sanitization - for production use a proper library
|
|
const allowedTags = [
|
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
'p', 'br', 'strong', 'em', 'code', 'pre',
|
|
'a', 'img', 'ul', 'ol', 'li', 'blockquote', 'hr'
|
|
];
|
|
|
|
// Remove script tags and event handlers
|
|
html = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
|
|
html = html.replace(/\bon\w+\s*=\s*"[^"]*"/gi, '');
|
|
html = html.replace(/\bon\w+\s*=\s*'[^']*'/gi, '');
|
|
html = html.replace(/javascript:/gi, '');
|
|
|
|
// This is a very basic sanitizer - for production use a proper library like DOMPurify
|
|
return html;
|
|
}
|
|
|
|
private escapeHtml(text: string): string {
|
|
const div = document.createElement('div');
|
|
div.textContent = text;
|
|
return div.innerHTML;
|
|
}
|
|
|
|
/**
|
|
* Extract plain text from markdown (for word/character counting)
|
|
*/
|
|
extractText(markdown: string): string {
|
|
// Remove markdown syntax and return plain text
|
|
let text = markdown;
|
|
|
|
// Remove code blocks
|
|
text = text.replace(/```[\s\S]*?```/g, '');
|
|
|
|
// Remove inline code
|
|
text = text.replace(/`[^`]*`/g, '');
|
|
|
|
// Remove images
|
|
text = text.replace(/!\[[^\]]*\]\([^)]*\)/g, '');
|
|
|
|
// Remove links but keep text
|
|
text = text.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1');
|
|
|
|
// Remove headers
|
|
text = text.replace(/^#{1,6}\s+/gm, '');
|
|
|
|
// Remove emphasis
|
|
text = text.replace(/\*\*(.*?)\*\*/g, '$1');
|
|
text = text.replace(/\*(.*?)\*/g, '$1');
|
|
text = text.replace(/__(.*?)__/g, '$1');
|
|
text = text.replace(/_(.*?)_/g, '$1');
|
|
|
|
// Remove blockquotes
|
|
text = text.replace(/^>\s+/gm, '');
|
|
|
|
// Remove list markers
|
|
text = text.replace(/^[\s]*[-*+]\s+/gm, '');
|
|
text = text.replace(/^[\s]*\d+\.\s+/gm, '');
|
|
|
|
// Remove horizontal rules
|
|
text = text.replace(/^[-*]{3,}$/gm, '');
|
|
|
|
// Clean up whitespace
|
|
text = text.replace(/\n+/g, ' ').replace(/\s+/g, ' ').trim();
|
|
|
|
return text;
|
|
}
|
|
|
|
/**
|
|
* Count words in markdown text
|
|
*/
|
|
countWords(markdown: string): number {
|
|
const plainText = this.extractText(markdown);
|
|
if (!plainText.trim()) return 0;
|
|
return plainText.trim().split(/\s+/).length;
|
|
}
|
|
|
|
/**
|
|
* Count characters in markdown text
|
|
*/
|
|
countCharacters(markdown: string): number {
|
|
return this.extractText(markdown).length;
|
|
}
|
|
|
|
/**
|
|
* Generate table of contents from headers
|
|
*/
|
|
generateTOC(markdown: string): Array<{level: number, text: string, anchor: string}> {
|
|
const headers: Array<{level: number, text: string, anchor: string}> = [];
|
|
const lines = markdown.split('\n');
|
|
|
|
lines.forEach(line => {
|
|
const headerMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
if (headerMatch) {
|
|
const level = headerMatch[1].length;
|
|
const text = headerMatch[2].trim();
|
|
const anchor = text.toLowerCase()
|
|
.replace(/[^a-z0-9\s-]/g, '')
|
|
.replace(/\s+/g, '-')
|
|
.replace(/-+/g, '-')
|
|
.replace(/^-|-$/g, '');
|
|
|
|
headers.push({ level, text, anchor });
|
|
}
|
|
});
|
|
|
|
return headers;
|
|
}
|
|
}
|
|
|
|
// Convenience functions for global use
|
|
export function parseMarkdown(markdown: string, options?: MarkdownParseOptions): string {
|
|
const parser = new SimpleMarkdownParser(options);
|
|
return parser.parse(markdown);
|
|
}
|
|
|
|
export function extractTextFromMarkdown(markdown: string): string {
|
|
const parser = new SimpleMarkdownParser();
|
|
return parser.extractText(markdown);
|
|
}
|
|
|
|
export function countWordsInMarkdown(markdown: string): number {
|
|
const parser = new SimpleMarkdownParser();
|
|
return parser.countWords(markdown);
|
|
}
|
|
|
|
export function countCharactersInMarkdown(markdown: string): number {
|
|
const parser = new SimpleMarkdownParser();
|
|
return parser.countCharacters(markdown);
|
|
}
|
|
|
|
export function generateMarkdownTOC(markdown: string): Array<{level: number, text: string, anchor: string}> {
|
|
const parser = new SimpleMarkdownParser();
|
|
return parser.generateTOC(markdown);
|
|
} |