initial commit

2026-01-18 22:01:50 +01:00
parent ab25613358
commit c40874d7f0
20 changed files with 6287 additions and 11 deletions
--- a/scripts/ollama-monitor.py
+++ b/scripts/ollama-monitor.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+"""
+Ollama Monitor - Real-time dashboard for Ollama instances
+"""
+
+import urllib.request
+import json
+import subprocess
+import time
+import os
+import sys
+
+# Terminal colors
+CLEAR, BOLD, RESET = "\033[2J\033[H", "\033[1m", "\033[0m"
+CYAN, GREEN, YELLOW, MAGENTA, RED = "\033[36m", "\033[32m", "\033[33m", "\033[35m", "\033[31m"
+
+
+def discover_ollama_instances():
+    """Auto-discover running Ollama instances."""
+    instances = {}
+    
+    # Try default port
+    if check_ollama_available("http://localhost:11434"):
+        instances["Ollama (default)"] = "http://localhost:11434"
+    
+    # Try common alternative ports
+    for port in [11435, 11436]:
+        url = f"http://localhost:{port}"
+        if check_ollama_available(url):
+            instances[f"Ollama (port {port})"] = url
+    
+    return instances
+
+
+def check_ollama_available(url):
+    """Check if an Ollama instance is available at the given URL."""
+    try:
+        with urllib.request.urlopen(f"{url}/api/tags", timeout=1) as r:
+            return r.status == 200
+    except:
+        return False
+
+
+def get_ollama_ps(url):
+    """Get running models from Ollama instance."""
+    try:
+        with urllib.request.urlopen(f"{url}/api/ps", timeout=0.5) as r:
+            return json.loads(r.read().decode()).get('models', [])
+    except Exception:
+        return None
+
+
+def get_gpu_metrics():
+    """Try to get GPU metrics from AMD GPU sysfs."""
+    try:
+        # Try multiple possible GPU device paths
+        device_paths = [
+            "/sys/class/drm/card1/device/",
+            "/sys/class/drm/card0/device/",
+        ]
+        
+        for base_path in device_paths:
+            if not os.path.exists(base_path):
+                continue
+                
+            try:
+                with open(base_path + "mem_info_vram_used", "r") as f:
+                    used = int(f.read().strip()) / 1024 / 1024
+                with open(base_path + "mem_info_vram_total", "r") as f:
+                    total = int(f.read().strip()) / 1024 / 1024
+                with open(base_path + "gpu_busy_percent", "r") as f:
+                    load = int(f.read().strip())
+                
+                # Sanity check: If VRAM usage is low but load is 99%, it's a driver glitch
+                if load == 99 and used < (total * 0.1):
+                    load = 0
+                    
+                return used, total, load
+            except:
+                continue
+                
+        return None, None, None
+    except:
+        return None, None, None
+
+
+def get_sys_metrics():
+    """Get system CPU and RAM metrics."""
+    try:
+        load_avg = os.getloadavg()[0]
+        mem_output = subprocess.check_output("free -m", shell=True).decode().split('\n')[1].split()
+        ram_used = int(mem_output[2])
+        ram_total = int(mem_output[1])
+        return load_avg, ram_used, ram_total
+    except Exception:
+        return 0.0, 0, 0
+
+
+def draw(instances):
+    """Draw the monitoring dashboard."""
+    load_avg, ram_used, ram_total = get_sys_metrics()
+    vram_used, vram_total, gpu_load = get_gpu_metrics()
+
+    out = [f"{CLEAR}{BOLD}{CYAN}=== OLLAMA MONITOR ==={RESET}"]
+    
+    # System metrics
+    out.append(f"{BOLD}CPU Load:{RESET} {YELLOW}{load_avg:.2f}{RESET} | "
+               f"{BOLD}RAM:{RESET} {MAGENTA}{ram_used}MB/{ram_total}MB{RESET}", )
+    
+    # GPU metrics (if available)
+    if vram_total is not None and gpu_load is not None:
+        load_color = GREEN if gpu_load < 80 else RED
+        out.append(f"{BOLD}GPU Load:{RESET} {load_color}{gpu_load}%{RESET} | "
+                   f"{BOLD}VRAM:{RESET} {CYAN}{vram_used:.0f}MB/{vram_total:.0f}MB{RESET}")
+    
+    out.append("─" * 70)
+
+    # Ollama instances
+    for name, url in instances.items():
+        models = get_ollama_ps(url)
+        status = f"{GREEN}ONLINE{RESET}" if models is not None else f"{RED}OFFLINE{RESET}"
+        out.append(f"\n{BOLD}{name}{RESET} [{status}] - {url}")
+        
+        if models:
+            if len(models) > 0:
+                out.append(f"  {'MODEL':<40} {'SIZE':<12} {'UNTIL':<20}")
+                for m in models:
+                    size_gb = m.get('size', 0) / (1024**3)
+                    until = m.get('expires_at', 'N/A')
+                    if until != 'N/A' and 'T' in until:
+                        # Parse ISO timestamp and show relative time
+                        until = until.split('T')[1].split('.')[0]
+                    
+                    out.append(f"  {m['name'][:39]:<40} {size_gb:>6.1f} GB   {until}")
+            else:
+                out.append(f"  {YELLOW}IDLE{RESET}")
+        elif models is None:
+            out.append(f"  {RED}Connection failed{RESET}")
+
+    print("\n".join(out) + f"\n\n{BOLD}{CYAN}Refreshing... (Ctrl+C to quit){RESET}")
+
+
+def main():
+    print("Discovering Ollama instances...")
+    instances = discover_ollama_instances()
+    
+    if not instances:
+        print(f"{RED}✗ No Ollama instances found.{RESET}")
+        print("  Make sure Ollama is running on the default port (11434)")
+        sys.exit(1)
+    
+    print(f"Found {len(instances)} instance(s). Starting monitor...\n")
+    time.sleep(1)
+    
+    try:
+        while True:
+            draw(instances)
+            time.sleep(1)
+    except KeyboardInterrupt:
+        print("\nMonitor stopped.")
+
+
+if __name__ == "__main__":
+    main()