ai queue
This commit is contained in:
parent
fbe8d4d4e1
commit
1419f64ab9
@ -2,6 +2,7 @@
|
|||||||
AI_API_ENDPOINT=https://aiendpoint.org
|
AI_API_ENDPOINT=https://aiendpoint.org
|
||||||
AI_API_KEY=your_apikey_here
|
AI_API_KEY=your_apikey_here
|
||||||
AI_MODEL='ai_model_name_here'
|
AI_MODEL='ai_model_name_here'
|
||||||
|
AI_RATE_LIMIT_DELAY_MS=2000
|
||||||
|
|
||||||
# Git Repository
|
# Git Repository
|
||||||
GIT_REPO_URL=https://git.cc24.dev/mstoeck3/cc24-hub.git
|
GIT_REPO_URL=https://git.cc24.dev/mstoeck3/cc24-hub.git
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
// src/pages/api/ai/query.ts (MINIMAL CHANGES - Preserves exact original behavior)
|
// src/pages/api/ai/query.ts
|
||||||
import type { APIRoute } from 'astro';
|
import type { APIRoute } from 'astro';
|
||||||
import { withAPIAuth } from '../../../utils/auth.js';
|
import { withAPIAuth } from '../../../utils/auth.js';
|
||||||
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
|
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
|
||||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js'; // ONLY import specific helpers we use
|
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||||
|
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
||||||
|
|
||||||
export const prerender = false;
|
export const prerender = false;
|
||||||
|
|
||||||
@ -316,29 +317,30 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
? createWorkflowSystemPrompt(toolsData)
|
? createWorkflowSystemPrompt(toolsData)
|
||||||
: createToolSystemPrompt(toolsData);
|
: createToolSystemPrompt(toolsData);
|
||||||
|
|
||||||
// AI API call (UNCHANGED)
|
const aiResponse = await enqueueApiCall(() =>
|
||||||
const aiResponse = await fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
|
fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'Authorization': `Bearer ${process.env.AI_API_KEY}`
|
'Authorization': `Bearer ${process.env.AI_API_KEY}`
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: AI_MODEL,
|
model: AI_MODEL,
|
||||||
messages: [
|
messages: [
|
||||||
{
|
{
|
||||||
role: 'system',
|
role: 'system',
|
||||||
content: systemPrompt
|
content: systemPrompt
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
role: 'user',
|
role: 'user',
|
||||||
content: sanitizedQuery
|
content: sanitizedQuery
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
max_tokens: 2000,
|
max_tokens: 2000,
|
||||||
temperature: 0.3
|
temperature: 0.3
|
||||||
|
})
|
||||||
})
|
})
|
||||||
});
|
);
|
||||||
|
|
||||||
// AI response handling (ONLY CHANGE: Use helpers for error responses)
|
// AI response handling (ONLY CHANGE: Use helpers for error responses)
|
||||||
if (!aiResponse.ok) {
|
if (!aiResponse.ok) {
|
||||||
|
92
src/utils/rateLimitedQueue.ts
Normal file
92
src/utils/rateLimitedQueue.ts
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
// src/utils/rateLimitedQueue.ts
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
// A tiny FIFO, single‑instance queue that spaces API requests by
|
||||||
|
// a configurable delay. Import `enqueueApiCall()` wherever you
|
||||||
|
// call the AI API and the queue will make sure calls are sent
|
||||||
|
// one after another with the defined pause in‑between.
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
|
||||||
|
import dotenv from "dotenv";
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delay (in **milliseconds**) between two consecutive API calls.
|
||||||
|
*
|
||||||
|
* Configure it in your `.env` file, e.g.
|
||||||
|
* AI_RATE_LIMIT_DELAY_MS=2000
|
||||||
|
* Defaults to **1000 ms** (≈ 1 request / second) when not set or invalid.
|
||||||
|
*/
|
||||||
|
const RATE_LIMIT_DELAY_MS = Number.parseInt(process.env.AI_RATE_LIMIT_DELAY_MS ?? "1000", 10) || 1000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal task type. Every task returns a Promise so callers get the
|
||||||
|
* real API response transparently.
|
||||||
|
*/
|
||||||
|
export type Task<T = unknown> = () => Promise<T>;
|
||||||
|
|
||||||
|
class RateLimitedQueue {
|
||||||
|
private queue: Task[] = [];
|
||||||
|
private processing = false;
|
||||||
|
private delayMs = RATE_LIMIT_DELAY_MS;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Schedule a task. Returns a Promise that resolves/rejects with the
|
||||||
|
* task result once the queue reaches it.
|
||||||
|
*/
|
||||||
|
add<T>(task: Task<T>): Promise<T> {
|
||||||
|
return new Promise<T>((resolve, reject) => {
|
||||||
|
this.queue.push(async () => {
|
||||||
|
try {
|
||||||
|
const result = await task();
|
||||||
|
resolve(result);
|
||||||
|
} catch (err) {
|
||||||
|
reject(err);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
this.process();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Change the delay at runtime – e.g. if you reload env vars without
|
||||||
|
* restarting the server.
|
||||||
|
*/
|
||||||
|
setDelay(ms: number): void {
|
||||||
|
if (!Number.isFinite(ms) || ms < 0) return;
|
||||||
|
this.delayMs = ms;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------
|
||||||
|
// ️🌐 Internal helpers
|
||||||
|
// ---------------------------------------
|
||||||
|
private async process(): Promise<void> {
|
||||||
|
if (this.processing) return;
|
||||||
|
this.processing = true;
|
||||||
|
|
||||||
|
while (this.queue.length > 0) {
|
||||||
|
const next = this.queue.shift();
|
||||||
|
if (!next) continue;
|
||||||
|
await next();
|
||||||
|
// Wait before the next one
|
||||||
|
await new Promise((r) => setTimeout(r, this.delayMs));
|
||||||
|
}
|
||||||
|
|
||||||
|
this.processing = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
// Export a **singleton** instance so every import shares the
|
||||||
|
// same queue. That way the rate‑limit is enforced globally.
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
const queue = new RateLimitedQueue();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper for convenience: `enqueueApiCall(() => fetch(...))`.
|
||||||
|
*/
|
||||||
|
export function enqueueApiCall<T>(task: Task<T>): Promise<T> {
|
||||||
|
return queue.add(task);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default queue;
|
Loading…
x
Reference in New Issue
Block a user