improve AI

This commit is contained in:
overcuriousity
2025-08-01 22:29:38 +02:00
parent 1b9d9b437b
commit 8693cd87d4
6 changed files with 426 additions and 269 deletions

View File

@@ -1,4 +1,5 @@
// src/pages/api/ai/query.ts - Enhanced for micro-task pipeline
// src/pages/api/ai/query.ts - FIXED: Rate limiting for micro-task pipeline
import type { APIRoute } from 'astro';
import { withAPIAuth } from '../../../utils/auth.js';
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
@@ -7,79 +8,94 @@ import { aiPipeline } from '../../../utils/aiPipeline.js';
export const prerender = false;
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
interface RateLimitData {
count: number;
resetTime: number;
microTaskCount: number;
}
const rateLimitStore = new Map<string, RateLimitData>();
// Enhanced rate limiting for micro-task architecture
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
const RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '8', 10); // Reduced due to micro-tasks
// Micro-task specific rate limiting
const MICRO_TASK_RATE_LIMIT = parseInt(process.env.AI_MICRO_TASK_RATE_LIMIT || '30', 10);
const microTaskRateLimitStore = new Map<string, { count: number; resetTime: number }>();
const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '4', 10);
const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10);
function sanitizeInput(input: string): string {
let sanitized = input
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]') // Remove code blocks
.replace(/\<\/?[^>]+(>|$)/g, '') // Remove HTML tags
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
.replace(/\<\/?[^>]+(>|$)/g, '')
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
.trim();
sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
return sanitized;
}
function checkRateLimit(userId: string): boolean {
function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
const now = Date.now();
const userLimit = rateLimitStore.get(userId);
if (!userLimit || now > userLimit.resetTime) {
rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
return true;
rateLimitStore.set(userId, {
count: 1,
resetTime: now + RATE_LIMIT_WINDOW,
microTaskCount: 0
});
return {
allowed: true,
microTasksRemaining: MICRO_TASK_TOTAL_LIMIT
};
}
if (userLimit.count >= RATE_LIMIT_MAX) {
return false;
if (userLimit.count >= MAIN_RATE_LIMIT_MAX) {
return {
allowed: false,
reason: `Main rate limit exceeded. Max ${MAIN_RATE_LIMIT_MAX} requests per minute.`
};
}
if (userLimit.microTaskCount >= MICRO_TASK_TOTAL_LIMIT) {
return {
allowed: false,
reason: `Micro-task limit exceeded. Max ${MICRO_TASK_TOTAL_LIMIT} AI calls per minute.`
};
}
userLimit.count++;
return true;
return {
allowed: true,
microTasksRemaining: MICRO_TASK_TOTAL_LIMIT - userLimit.microTaskCount
};
}
// Enhanced: Check micro-task rate limiting
function checkMicroTaskRateLimit(userId: string): { allowed: boolean; remaining: number } {
const now = Date.now();
const userLimit = microTaskRateLimitStore.get(userId);
if (!userLimit || now > userLimit.resetTime) {
microTaskRateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
return { allowed: true, remaining: MICRO_TASK_RATE_LIMIT - 1 };
function incrementMicroTaskCount(userId: string, aiCallsMade: number): void {
const userLimit = rateLimitStore.get(userId);
if (userLimit) {
userLimit.microTaskCount += aiCallsMade;
console.log(`[RATE LIMIT] User ${userId} now at ${userLimit.microTaskCount}/${MICRO_TASK_TOTAL_LIMIT} micro-task calls`);
}
if (userLimit.count >= MICRO_TASK_RATE_LIMIT) {
return { allowed: false, remaining: 0 };
}
userLimit.count++;
return { allowed: true, remaining: MICRO_TASK_RATE_LIMIT - userLimit.count };
}
function cleanupExpiredRateLimits() {
const now = Date.now();
const maxStoreSize = 1000;
// Clean up main rate limits
for (const [userId, limit] of rateLimitStore.entries()) {
if (now > limit.resetTime) {
rateLimitStore.delete(userId);
}
}
// Clean up micro-task rate limits
for (const [userId, limit] of microTaskRateLimitStore.entries()) {
if (now > limit.resetTime) {
microTaskRateLimitStore.delete(userId);
}
if (rateLimitStore.size > maxStoreSize) {
const entries = Array.from(rateLimitStore.entries());
entries.sort((a, b) => a[1].resetTime - b[1].resetTime);
const toRemove = entries.slice(0, entries.length - maxStoreSize);
toRemove.forEach(([userId]) => rateLimitStore.delete(userId));
console.log(`[RATE LIMIT] Cleanup: removed ${toRemove.length} old entries`);
}
}
@@ -94,24 +110,16 @@ export const POST: APIRoute = async ({ request }) => {
const userId = authResult.userId;
// Check main rate limit
if (!checkRateLimit(userId)) {
return apiError.rateLimit('Rate limit exceeded');
}
// Enhanced: Check micro-task rate limit
const microTaskLimit = checkMicroTaskRateLimit(userId);
if (!microTaskLimit.allowed) {
return apiError.rateLimit(
`Micro-task rate limit exceeded. The new AI pipeline uses multiple smaller requests. Please wait before trying again.`
);
const rateLimitResult = checkRateLimit(userId);
if (!rateLimitResult.allowed) {
return apiError.rateLimit(rateLimitResult.reason || 'Rate limit exceeded');
}
const body = await request.json();
const { query, mode = 'workflow', taskId: clientTaskId } = body;
console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
console.log(`[MICRO-TASK API] Micro-task rate limit remaining: ${microTaskLimit.remaining}`);
console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
if (!query || typeof query !== 'string') {
console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
@@ -133,7 +141,6 @@ export const POST: APIRoute = async ({ request }) => {
console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
// Use the enhanced micro-task AI pipeline
const result = await enqueueApiCall(() =>
aiPipeline.processQuery(sanitizedQuery, mode)
, taskId);
@@ -142,8 +149,10 @@ export const POST: APIRoute = async ({ request }) => {
return apiServerError.unavailable('No response from micro-task AI pipeline');
}
// Enhanced: Log micro-task statistics
const stats = result.processingStats;
const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
incrementMicroTaskCount(userId, estimatedAICallsMade);
console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
console.log(` - Mode: ${mode}`);
console.log(` - User: ${userId}`);
@@ -151,10 +160,14 @@ export const POST: APIRoute = async ({ request }) => {
console.log(` - Processing time: ${stats.processingTimeMs}ms`);
console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`);
console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`);
console.log(` - Estimated AI calls: ${estimatedAICallsMade}`);
console.log(` - Embeddings used: ${stats.embeddingsUsed}`);
console.log(` - Final items: ${stats.finalSelectedItems}`);
// Enhanced: Include pipeline information in response
const currentLimit = rateLimitStore.get(userId);
const remainingMicroTasks = currentLimit ?
MICRO_TASK_TOTAL_LIMIT - currentLimit.microTaskCount : MICRO_TASK_TOTAL_LIMIT;
return new Response(JSON.stringify({
success: true,
mode,
@@ -163,14 +176,14 @@ export const POST: APIRoute = async ({ request }) => {
query: sanitizedQuery,
processingStats: {
...result.processingStats,
// Add micro-task specific info
pipelineType: 'micro-task',
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed)
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
estimatedAICallsMade
},
// Enhanced: Rate limiting info for client
rateLimitInfo: {
remaining: microTaskLimit.remaining,
mainRequestsRemaining: MAIN_RATE_LIMIT_MAX - (currentLimit?.count || 0),
microTaskCallsRemaining: remainingMicroTasks,
resetTime: Date.now() + RATE_LIMIT_WINDOW
}
}), {
@@ -181,15 +194,14 @@ export const POST: APIRoute = async ({ request }) => {
} catch (error) {
console.error('[MICRO-TASK API] Pipeline error:', error);
// Enhanced: More specific error messages for micro-task pipeline
if (error.message.includes('embeddings')) {
return apiServerError.unavailable('Embeddings service error - falling back to selector AI');
return apiServerError.unavailable('Embeddings service error - using AI fallback');
} else if (error.message.includes('micro-task')) {
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps may have failed');
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed');
} else if (error.message.includes('selector')) {
return apiServerError.unavailable('AI selector service error');
} else if (error.message.includes('rate limit')) {
return apiError.rateLimit('AI service rate limits exceeded due to micro-task processing');
return apiError.rateLimit('AI service rate limits exceeded during micro-task processing');
} else {
return apiServerError.internal('Micro-task AI pipeline error');
}