From 1419f64ab96edb22bfc7e954ddacfc30354c2a45 Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Fri, 25 Jul 2025 23:50:30 +0200
Subject: [PATCH] ai queue

---
 .env.example                  |  1 +
 src/pages/api/ai/query.ts     | 50 ++++++++++---------
 src/utils/rateLimitedQueue.ts | 92 +++++++++++++++++++++++++++++++++++
 3 files changed, 119 insertions(+), 24 deletions(-)
 create mode 100644 src/utils/rateLimitedQueue.ts

diff --git a/.env.example b/.env.example
index eb7304d..df4fa4a 100644
--- a/.env.example
+++ b/.env.example
@@ -2,6 +2,7 @@
 AI_API_ENDPOINT=https://aiendpoint.org
 AI_API_KEY=your_apikey_here
 AI_MODEL='ai_model_name_here'
+AI_RATE_LIMIT_DELAY_MS=2000
 
 # Git Repository
 GIT_REPO_URL=https://git.cc24.dev/mstoeck3/cc24-hub.git
diff --git a/src/pages/api/ai/query.ts b/src/pages/api/ai/query.ts
index 11380d7..2600754 100644
--- a/src/pages/api/ai/query.ts
+++ b/src/pages/api/ai/query.ts
@@ -1,8 +1,9 @@
-// src/pages/api/ai/query.ts (MINIMAL CHANGES - Preserves exact original behavior)
+// src/pages/api/ai/query.ts
 import type { APIRoute } from 'astro';
 import { withAPIAuth } from '../../../utils/auth.js';
 import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
-import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js'; // ONLY import specific helpers we use
+import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
+import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';   
 
 export const prerender = false;
 
@@ -316,29 +317,30 @@ export const POST: APIRoute = async ({ request }) => {
       ? createWorkflowSystemPrompt(toolsData)
       : createToolSystemPrompt(toolsData);
     
-    // AI API call (UNCHANGED)
-    const aiResponse = await fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/json',
-        'Authorization': `Bearer ${process.env.AI_API_KEY}`
-      },
-      body: JSON.stringify({
-        model: AI_MODEL,
-        messages: [
-          {
-            role: 'system',
-            content: systemPrompt
-          },
-          {
-            role: 'user', 
-            content: sanitizedQuery
-          }
-        ],
-        max_tokens: 2000,
-        temperature: 0.3
+    const aiResponse = await enqueueApiCall(() =>
+      fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+          'Authorization': `Bearer ${process.env.AI_API_KEY}`
+        },
+        body: JSON.stringify({
+          model: AI_MODEL,
+          messages: [
+            {
+              role: 'system',
+              content: systemPrompt
+            },
+            {
+              role: 'user',
+              content: sanitizedQuery
+            }
+          ],
+          max_tokens: 2000,
+          temperature: 0.3
+        })
       })
-    });
+    );
 
     // AI response handling (ONLY CHANGE: Use helpers for error responses)
     if (!aiResponse.ok) {
diff --git a/src/utils/rateLimitedQueue.ts b/src/utils/rateLimitedQueue.ts
new file mode 100644
index 0000000..ea1b21a
--- /dev/null
+++ b/src/utils/rateLimitedQueue.ts
@@ -0,0 +1,92 @@
+// src/utils/rateLimitedQueue.ts
+// ------------------------------------------------------------
+// A tiny FIFO, single‑instance queue that spaces API requests by
+// a configurable delay. Import `enqueueApiCall()` wherever you
+// call the AI API and the queue will make sure calls are sent
+// one after another with the defined pause in‑between.
+// ------------------------------------------------------------
+
+import dotenv from "dotenv";
+
+dotenv.config();
+
+/**
+ * Delay (in **milliseconds**) between two consecutive API calls.
+ *
+ * Configure it in your `.env` file, e.g.
+ *   AI_RATE_LIMIT_DELAY_MS=2000
+ * Defaults to **1000 ms** (≈ 1 request / second) when not set or invalid.
+ */
+const RATE_LIMIT_DELAY_MS = Number.parseInt(process.env.AI_RATE_LIMIT_DELAY_MS ?? "1000", 10) || 1000;
+
+/**
+ * Internal task type. Every task returns a Promise so callers get the
+ * real API response transparently.
+ */
+export type Task<T = unknown> = () => Promise<T>;
+
+class RateLimitedQueue {
+  private queue: Task[] = [];
+  private processing = false;
+  private delayMs = RATE_LIMIT_DELAY_MS;
+
+  /**
+   * Schedule a task. Returns a Promise that resolves/rejects with the
+   * task result once the queue reaches it.
+   */
+  add<T>(task: Task<T>): Promise<T> {
+    return new Promise<T>((resolve, reject) => {
+      this.queue.push(async () => {
+        try {
+          const result = await task();
+          resolve(result);
+        } catch (err) {
+          reject(err);
+        }
+      });
+      this.process();
+    });
+  }
+
+  /**
+   * Change the delay at runtime – e.g. if you reload env vars without
+   * restarting the server.
+   */
+  setDelay(ms: number): void {
+    if (!Number.isFinite(ms) || ms < 0) return;
+    this.delayMs = ms;
+  }
+
+  // ---------------------------------------
+  // ️🌐  Internal helpers
+  // ---------------------------------------
+  private async process(): Promise<void> {
+    if (this.processing) return;
+    this.processing = true;
+
+    while (this.queue.length > 0) {
+      const next = this.queue.shift();
+      if (!next) continue;
+      await next();
+      // Wait before the next one
+      await new Promise((r) => setTimeout(r, this.delayMs));
+    }
+
+    this.processing = false;
+  }
+}
+
+// ------------------------------------------------------------
+// Export a **singleton** instance so every import shares the
+// same queue. That way the rate‑limit is enforced globally.
+// ------------------------------------------------------------
+const queue = new RateLimitedQueue();
+
+/**
+ * Helper for convenience: `enqueueApiCall(() => fetch(...))`.
+ */
+export function enqueueApiCall<T>(task: Task<T>): Promise<T> {
+  return queue.add(task);
+}
+
+export default queue;