334 lines
9.5 KiB
JavaScript
334 lines
9.5 KiB
JavaScript
#!/usr/bin/env node
|
||
// find-duplicate-functions.mjs
|
||
// Usage:
|
||
// node find-duplicate-functions.mjs [rootDir] [--mode exact|struct] [--min-lines N] [--json]
|
||
// Example:
|
||
// node find-duplicate-functions.mjs . --mode struct --min-lines 3
|
||
|
||
import fs from "fs";
|
||
import path from "path";
|
||
import * as url from "url";
|
||
import ts from "typescript";
|
||
|
||
const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
|
||
|
||
/** -------- CLI OPTIONS -------- */
|
||
const args = process.argv.slice(2);
|
||
let rootDir = ".";
|
||
let mode = "struct"; // "exact" | "struct"
|
||
let minLines = 3;
|
||
let outputJson = false;
|
||
|
||
for (let i = 0; i < args.length; i++) {
|
||
const a = args[i];
|
||
if (!a.startsWith("--") && rootDir === ".") {
|
||
rootDir = a;
|
||
} else if (a === "--mode") {
|
||
mode = (args[++i] || "struct").toLowerCase();
|
||
if (!["exact", "struct"].includes(mode)) {
|
||
console.error("Invalid --mode. Use 'exact' or 'struct'.");
|
||
process.exit(1);
|
||
}
|
||
} else if (a === "--min-lines") {
|
||
minLines = parseInt(args[++i] || "3", 10);
|
||
} else if (a === "--json") {
|
||
outputJson = true;
|
||
}
|
||
}
|
||
|
||
/** -------- FILE DISCOVERY -------- */
|
||
const DEFAULT_IGNORES = new Set([
|
||
"node_modules",
|
||
".git",
|
||
".next",
|
||
".vercel",
|
||
"dist",
|
||
"build",
|
||
".astro", // Astro's generated cache dir
|
||
]);
|
||
|
||
const VALID_EXTS = new Set([".ts", ".tsx", ".astro", ".mts", ".cts"]);
|
||
|
||
function walk(dir) {
|
||
/** @type {string[]} */
|
||
const out = [];
|
||
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
||
for (const e of entries) {
|
||
const p = path.join(dir, e.name);
|
||
if (e.isDirectory()) {
|
||
if (DEFAULT_IGNORES.has(e.name)) continue;
|
||
out.push(...walk(p));
|
||
} else if (e.isFile() && VALID_EXTS.has(path.extname(e.name))) {
|
||
out.push(p);
|
||
}
|
||
}
|
||
return out;
|
||
}
|
||
|
||
/** -------- ASTRO CODE EXTRACTION --------
|
||
* Extract TS/JS code from:
|
||
* - frontmatter: --- ... ---
|
||
* - <script ...> ... </script>
|
||
*/
|
||
function extractCodeFromAstro(source) {
|
||
/** @type {{code:string, offset:number}[]} */
|
||
const blocks = [];
|
||
|
||
// Frontmatter (must be at top in Astro)
|
||
// Match the FIRST pair of --- ... ---
|
||
const fm = source.startsWith("---")
|
||
? (() => {
|
||
const end = source.indexOf("\n---", 3);
|
||
if (end !== -1) {
|
||
const front = source.slice(3, end + 1); // include trailing \n
|
||
return { start: 0, end: end + 4, code: front };
|
||
}
|
||
return null;
|
||
})()
|
||
: null;
|
||
if (fm) {
|
||
// offset for line numbers is after the first '---\n'
|
||
blocks.push({ code: fm.code, offset: 4 }); // rough; we’ll fix line numbers via positions later
|
||
}
|
||
|
||
// <script ...> ... </script>
|
||
const scriptRe = /<script\b[^>]*>([\s\S]*?)<\/script>/gi;
|
||
let m;
|
||
while ((m = scriptRe.exec(source))) {
|
||
const code = m[1] || "";
|
||
blocks.push({ code, offset: indexToLine(source, m.index) });
|
||
}
|
||
|
||
return blocks;
|
||
}
|
||
|
||
/** -------- UTIL: index -> 1-based line -------- */
|
||
function indexToLine(text, idx) {
|
||
let line = 1;
|
||
for (let i = 0; i < idx && i < text.length; i++) {
|
||
if (text.charCodeAt(i) === 10) line++;
|
||
}
|
||
return line;
|
||
}
|
||
|
||
/** -------- AST HELPERS -------- */
|
||
function createSourceFile(virtualPath, code) {
|
||
return ts.createSourceFile(
|
||
virtualPath,
|
||
code,
|
||
ts.ScriptTarget.Latest,
|
||
/*setParentNodes*/ true,
|
||
virtualPath.endsWith(".tsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS
|
||
);
|
||
}
|
||
|
||
// Normalize AST to a structural signature string
|
||
function structuralSignature(node) {
|
||
/** @type {string[]} */
|
||
const parts = [];
|
||
const visit = (n) => {
|
||
// Skip trivia: comments/whitespace are already not in AST
|
||
const kindName = ts.SyntaxKind[n.kind] || `K${n.kind}`;
|
||
switch (n.kind) {
|
||
case ts.SyntaxKind.Identifier:
|
||
parts.push("Id");
|
||
return;
|
||
case ts.SyntaxKind.PrivateIdentifier:
|
||
parts.push("PrivId");
|
||
return;
|
||
case ts.SyntaxKind.StringLiteral:
|
||
case ts.SyntaxKind.NoSubstitutionTemplateLiteral:
|
||
case ts.SyntaxKind.TemplateHead:
|
||
case ts.SyntaxKind.TemplateMiddle:
|
||
case ts.SyntaxKind.TemplateTail:
|
||
parts.push("Str");
|
||
return;
|
||
case ts.SyntaxKind.NumericLiteral:
|
||
parts.push("Num");
|
||
return;
|
||
case ts.SyntaxKind.TrueKeyword:
|
||
case ts.SyntaxKind.FalseKeyword:
|
||
parts.push("Bool");
|
||
return;
|
||
case ts.SyntaxKind.NullKeyword:
|
||
case ts.SyntaxKind.UndefinedKeyword:
|
||
parts.push("Nil");
|
||
return;
|
||
case ts.SyntaxKind.PropertyAssignment:
|
||
case ts.SyntaxKind.ShorthandPropertyAssignment:
|
||
case ts.SyntaxKind.MethodDeclaration:
|
||
case ts.SyntaxKind.MethodSignature:
|
||
parts.push("Prop");
|
||
break;
|
||
default:
|
||
parts.push(kindName);
|
||
}
|
||
n.forEachChild(visit);
|
||
};
|
||
visit(node);
|
||
return parts.join("|");
|
||
}
|
||
|
||
function getFunctionInfo(sf, filePath) {
|
||
/** @type {Array<{
|
||
name: string,
|
||
bodyText: string,
|
||
structKey: string,
|
||
start: number,
|
||
end: number,
|
||
startLine: number,
|
||
endLine: number
|
||
}>} */
|
||
const out = [];
|
||
|
||
const addFunc = (nameNode, bodyNode) => {
|
||
if (!bodyNode) return;
|
||
const bodyText = bodyNode.getText(sf).trim();
|
||
const start = bodyNode.getStart(sf);
|
||
const end = bodyNode.getEnd();
|
||
const { line: startLine } = sf.getLineAndCharacterOfPosition(start);
|
||
const { line: endLine } = sf.getLineAndCharacterOfPosition(end);
|
||
const name =
|
||
nameNode && ts.isIdentifier(nameNode) ? nameNode.escapedText.toString() : "(anonymous)";
|
||
|
||
// min-lines filter
|
||
const lines = bodyText.split(/\r?\n/).filter(Boolean);
|
||
if (lines.length < minLines) return;
|
||
|
||
// structural signature from the body
|
||
const structKey = structuralSignature(bodyNode);
|
||
|
||
out.push({
|
||
name,
|
||
bodyText,
|
||
structKey,
|
||
start,
|
||
end,
|
||
startLine: startLine + 1,
|
||
endLine: endLine + 1,
|
||
});
|
||
};
|
||
|
||
const visit = (node) => {
|
||
if (ts.isFunctionDeclaration(node) && node.body) {
|
||
addFunc(node.name ?? null, node.body);
|
||
} else if (
|
||
ts.isFunctionExpression(node) ||
|
||
ts.isArrowFunction(node)
|
||
) {
|
||
// find name if it’s assigned: const foo = () => {}
|
||
let name = null;
|
||
if (node.parent && ts.isVariableDeclaration(node.parent) && node.parent.name) {
|
||
name = node.parent.name;
|
||
} else if (
|
||
node.parent &&
|
||
ts.isPropertyAssignment(node.parent) &&
|
||
ts.isIdentifier(node.parent.name)
|
||
) {
|
||
name = node.parent.name;
|
||
} else if (node.name) {
|
||
name = node.name;
|
||
}
|
||
if (node.body) addFunc(name, node.body);
|
||
} else if (ts.isMethodDeclaration(node) && node.body) {
|
||
addFunc(node.name, node.body);
|
||
}
|
||
node.forEachChild(visit);
|
||
};
|
||
|
||
visit(sf);
|
||
return out;
|
||
}
|
||
|
||
/** -------- MAIN SCAN -------- */
|
||
const files = walk(path.resolve(process.cwd(), rootDir));
|
||
|
||
/** Maps from hash -> occurrences */
|
||
const groups = new Map();
|
||
/** Helper for exact hash */
|
||
import crypto from "crypto";
|
||
const exactHash = (text) => crypto.createHash("sha1").update(text.replace(/\s+/g, " ").trim()).digest("hex");
|
||
|
||
for (const file of files) {
|
||
try {
|
||
const ext = path.extname(file).toLowerCase();
|
||
const raw = fs.readFileSync(file, "utf8");
|
||
|
||
/** @type {Array<{virtualPath:string, code:string, lineOffset:number}>} */
|
||
const codeUnits = [];
|
||
|
||
if (ext === ".astro") {
|
||
const blocks = extractCodeFromAstro(raw);
|
||
blocks.forEach((b, i) => {
|
||
codeUnits.push({
|
||
virtualPath: file + `#astro${i + 1}.ts`,
|
||
code: b.code,
|
||
lineOffset: b.offset || 1,
|
||
});
|
||
});
|
||
} else {
|
||
codeUnits.push({ virtualPath: file, code: raw, lineOffset: 1 });
|
||
}
|
||
|
||
for (const { virtualPath, code, lineOffset } of codeUnits) {
|
||
const sf = createSourceFile(virtualPath, code);
|
||
const funcs = getFunctionInfo(sf, file);
|
||
for (const f of funcs) {
|
||
const key =
|
||
mode === "exact" ? exactHash(f.bodyText) : crypto.createHash("sha1").update(f.structKey).digest("hex");
|
||
const item = {
|
||
file,
|
||
where:
|
||
ext === ".astro"
|
||
? `${path.relative(process.cwd(), file)}:${f.startLine + lineOffset - 1}-${f.endLine + lineOffset - 1}`
|
||
: `${path.relative(process.cwd(), file)}:${f.startLine}-${f.endLine}`,
|
||
name: f.name,
|
||
lines: f.endLine - f.startLine + 1,
|
||
preview: f.bodyText.split(/\r?\n/).slice(0, 5).join("\n") + (f.endLine - f.startLine + 1 > 5 ? "\n..." : ""),
|
||
};
|
||
if (!groups.has(key)) groups.set(key, []);
|
||
groups.get(key).push(item);
|
||
}
|
||
}
|
||
} catch (e) {
|
||
console.warn(`⚠️ Skipping ${file}: ${e.message}`);
|
||
}
|
||
}
|
||
|
||
/** -------- REPORT -------- */
|
||
const dupes = [...groups.entries()]
|
||
.map(([key, arr]) => ({ key, items: arr }))
|
||
.filter((g) => g.items.length > 1)
|
||
.sort((a, b) => b.items.length - a.items.length);
|
||
|
||
if (outputJson) {
|
||
console.log(JSON.stringify({ mode, minLines, groups: dupes }, null, 2));
|
||
process.exit(0);
|
||
}
|
||
|
||
if (dupes.length === 0) {
|
||
console.log(`✅ No duplicate functions found (mode=${mode}, min-lines=${minLines}).`);
|
||
process.exit(0);
|
||
}
|
||
|
||
console.log(`\nFound ${dupes.length} duplicate group(s) (mode=${mode}, min-lines=${minLines}):\n`);
|
||
dupes.forEach((g, i) => {
|
||
console.log(`== Group ${i + 1} (${g.items.length} matches) ==`);
|
||
const example = g.items[0];
|
||
console.log(` Sample (${example.lines} lines) from ${example.where}${example.name ? ` [${example.name}]` : ""}`);
|
||
console.log(" ---");
|
||
console.log(indent(example.preview, " "));
|
||
console.log(" ---");
|
||
g.items.forEach((it) => {
|
||
console.log(` • ${it.where}${it.name ? ` [${it.name}]` : ""} (${it.lines} lines)`);
|
||
});
|
||
console.log();
|
||
});
|
||
|
||
function indent(s, pre) {
|
||
return s
|
||
.split("\n")
|
||
.map((l) => pre + l)
|
||
.join("\n");
|
||
}
|