#!/usr/bin/env node // find-duplicate-functions.mjs // Usage: // node find-duplicate-functions.mjs [rootDir] [--mode exact|struct] [--min-lines N] [--json] // Example: // node find-duplicate-functions.mjs . --mode struct --min-lines 3 import fs from "fs"; import path from "path"; import * as url from "url"; import ts from "typescript"; const __dirname = path.dirname(url.fileURLToPath(import.meta.url)); /** -------- CLI OPTIONS -------- */ const args = process.argv.slice(2); let rootDir = "."; let mode = "struct"; // "exact" | "struct" let minLines = 3; let outputJson = false; for (let i = 0; i < args.length; i++) { const a = args[i]; if (!a.startsWith("--") && rootDir === ".") { rootDir = a; } else if (a === "--mode") { mode = (args[++i] || "struct").toLowerCase(); if (!["exact", "struct"].includes(mode)) { console.error("Invalid --mode. Use 'exact' or 'struct'."); process.exit(1); } } else if (a === "--min-lines") { minLines = parseInt(args[++i] || "3", 10); } else if (a === "--json") { outputJson = true; } } /** -------- FILE DISCOVERY -------- */ const DEFAULT_IGNORES = new Set([ "node_modules", ".git", ".next", ".vercel", "dist", "build", ".astro", // Astro's generated cache dir ]); const VALID_EXTS = new Set([".ts", ".tsx", ".astro", ".mts", ".cts"]); function walk(dir) { /** @type {string[]} */ const out = []; const entries = fs.readdirSync(dir, { withFileTypes: true }); for (const e of entries) { const p = path.join(dir, e.name); if (e.isDirectory()) { if (DEFAULT_IGNORES.has(e.name)) continue; out.push(...walk(p)); } else if (e.isFile() && VALID_EXTS.has(path.extname(e.name))) { out.push(p); } } return out; } /** -------- ASTRO CODE EXTRACTION -------- * Extract TS/JS code from: * - frontmatter: --- ... --- * - */ function extractCodeFromAstro(source) { /** @type {{code:string, offset:number}[]} */ const blocks = []; // Frontmatter (must be at top in Astro) // Match the FIRST pair of --- ... --- const fm = source.startsWith("---") ? (() => { const end = source.indexOf("\n---", 3); if (end !== -1) { const front = source.slice(3, end + 1); // include trailing \n return { start: 0, end: end + 4, code: front }; } return null; })() : null; if (fm) { // offset for line numbers is after the first '---\n' blocks.push({ code: fm.code, offset: 4 }); // rough; we’ll fix line numbers via positions later } // const scriptRe = /]*>([\s\S]*?)<\/script>/gi; let m; while ((m = scriptRe.exec(source))) { const code = m[1] || ""; blocks.push({ code, offset: indexToLine(source, m.index) }); } return blocks; } /** -------- UTIL: index -> 1-based line -------- */ function indexToLine(text, idx) { let line = 1; for (let i = 0; i < idx && i < text.length; i++) { if (text.charCodeAt(i) === 10) line++; } return line; } /** -------- AST HELPERS -------- */ function createSourceFile(virtualPath, code) { return ts.createSourceFile( virtualPath, code, ts.ScriptTarget.Latest, /*setParentNodes*/ true, virtualPath.endsWith(".tsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS ); } // Normalize AST to a structural signature string function structuralSignature(node) { /** @type {string[]} */ const parts = []; const visit = (n) => { // Skip trivia: comments/whitespace are already not in AST const kindName = ts.SyntaxKind[n.kind] || `K${n.kind}`; switch (n.kind) { case ts.SyntaxKind.Identifier: parts.push("Id"); return; case ts.SyntaxKind.PrivateIdentifier: parts.push("PrivId"); return; case ts.SyntaxKind.StringLiteral: case ts.SyntaxKind.NoSubstitutionTemplateLiteral: case ts.SyntaxKind.TemplateHead: case ts.SyntaxKind.TemplateMiddle: case ts.SyntaxKind.TemplateTail: parts.push("Str"); return; case ts.SyntaxKind.NumericLiteral: parts.push("Num"); return; case ts.SyntaxKind.TrueKeyword: case ts.SyntaxKind.FalseKeyword: parts.push("Bool"); return; case ts.SyntaxKind.NullKeyword: case ts.SyntaxKind.UndefinedKeyword: parts.push("Nil"); return; case ts.SyntaxKind.PropertyAssignment: case ts.SyntaxKind.ShorthandPropertyAssignment: case ts.SyntaxKind.MethodDeclaration: case ts.SyntaxKind.MethodSignature: parts.push("Prop"); break; default: parts.push(kindName); } n.forEachChild(visit); }; visit(node); return parts.join("|"); } function getFunctionInfo(sf, filePath) { /** @type {Array<{ name: string, bodyText: string, structKey: string, start: number, end: number, startLine: number, endLine: number }>} */ const out = []; const addFunc = (nameNode, bodyNode) => { if (!bodyNode) return; const bodyText = bodyNode.getText(sf).trim(); const start = bodyNode.getStart(sf); const end = bodyNode.getEnd(); const { line: startLine } = sf.getLineAndCharacterOfPosition(start); const { line: endLine } = sf.getLineAndCharacterOfPosition(end); const name = nameNode && ts.isIdentifier(nameNode) ? nameNode.escapedText.toString() : "(anonymous)"; // min-lines filter const lines = bodyText.split(/\r?\n/).filter(Boolean); if (lines.length < minLines) return; // structural signature from the body const structKey = structuralSignature(bodyNode); out.push({ name, bodyText, structKey, start, end, startLine: startLine + 1, endLine: endLine + 1, }); }; const visit = (node) => { if (ts.isFunctionDeclaration(node) && node.body) { addFunc(node.name ?? null, node.body); } else if ( ts.isFunctionExpression(node) || ts.isArrowFunction(node) ) { // find name if it’s assigned: const foo = () => {} let name = null; if (node.parent && ts.isVariableDeclaration(node.parent) && node.parent.name) { name = node.parent.name; } else if ( node.parent && ts.isPropertyAssignment(node.parent) && ts.isIdentifier(node.parent.name) ) { name = node.parent.name; } else if (node.name) { name = node.name; } if (node.body) addFunc(name, node.body); } else if (ts.isMethodDeclaration(node) && node.body) { addFunc(node.name, node.body); } node.forEachChild(visit); }; visit(sf); return out; } /** -------- MAIN SCAN -------- */ const files = walk(path.resolve(process.cwd(), rootDir)); /** Maps from hash -> occurrences */ const groups = new Map(); /** Helper for exact hash */ import crypto from "crypto"; const exactHash = (text) => crypto.createHash("sha1").update(text.replace(/\s+/g, " ").trim()).digest("hex"); for (const file of files) { try { const ext = path.extname(file).toLowerCase(); const raw = fs.readFileSync(file, "utf8"); /** @type {Array<{virtualPath:string, code:string, lineOffset:number}>} */ const codeUnits = []; if (ext === ".astro") { const blocks = extractCodeFromAstro(raw); blocks.forEach((b, i) => { codeUnits.push({ virtualPath: file + `#astro${i + 1}.ts`, code: b.code, lineOffset: b.offset || 1, }); }); } else { codeUnits.push({ virtualPath: file, code: raw, lineOffset: 1 }); } for (const { virtualPath, code, lineOffset } of codeUnits) { const sf = createSourceFile(virtualPath, code); const funcs = getFunctionInfo(sf, file); for (const f of funcs) { const key = mode === "exact" ? exactHash(f.bodyText) : crypto.createHash("sha1").update(f.structKey).digest("hex"); const item = { file, where: ext === ".astro" ? `${path.relative(process.cwd(), file)}:${f.startLine + lineOffset - 1}-${f.endLine + lineOffset - 1}` : `${path.relative(process.cwd(), file)}:${f.startLine}-${f.endLine}`, name: f.name, lines: f.endLine - f.startLine + 1, preview: f.bodyText.split(/\r?\n/).slice(0, 5).join("\n") + (f.endLine - f.startLine + 1 > 5 ? "\n..." : ""), }; if (!groups.has(key)) groups.set(key, []); groups.get(key).push(item); } } } catch (e) { console.warn(`⚠️ Skipping ${file}: ${e.message}`); } } /** -------- REPORT -------- */ const dupes = [...groups.entries()] .map(([key, arr]) => ({ key, items: arr })) .filter((g) => g.items.length > 1) .sort((a, b) => b.items.length - a.items.length); if (outputJson) { console.log(JSON.stringify({ mode, minLines, groups: dupes }, null, 2)); process.exit(0); } if (dupes.length === 0) { console.log(`✅ No duplicate functions found (mode=${mode}, min-lines=${minLines}).`); process.exit(0); } console.log(`\nFound ${dupes.length} duplicate group(s) (mode=${mode}, min-lines=${minLines}):\n`); dupes.forEach((g, i) => { console.log(`== Group ${i + 1} (${g.items.length} matches) ==`); const example = g.items[0]; console.log(` Sample (${example.lines} lines) from ${example.where}${example.name ? ` [${example.name}]` : ""}`); console.log(" ---"); console.log(indent(example.preview, " ")); console.log(" ---"); g.items.forEach((it) => { console.log(` • ${it.where}${it.name ? ` [${it.name}]` : ""} (${it.lines} lines)`); }); console.log(); }); function indent(s, pre) { return s .split("\n") .map((l) => pre + l) .join("\n"); }