improvements to graphs

This commit is contained in:
overcuriousity
2026-01-17 23:12:24 +01:00
parent 7a76f60bfe
commit 1fc952c811
2 changed files with 305 additions and 75 deletions

View File

@@ -362,6 +362,26 @@ class WebInterface:
test_results = []
for test in results.get('test_results', []):
score = test.get('score') or test.get('overall_score')
# Handle notes differently for multi-turn vs single-turn tests
if test.get('type') == 'multi_turn' and 'turns' in test:
# Combine notes from all turns for multi-turn tests
turn_notes = []
for turn in test.get('turns', []):
turn_num = turn.get('turn', '?')
turn_note = turn.get('notes', '')
if turn_note:
turn_notes.append(f"T{turn_num}: {turn_note}")
notes = ' | '.join(turn_notes) if turn_notes else ''
# Get aggregate generation time and metrics for multi-turn
total_gen_time = sum(t.get('generation_time', 0) for t in test.get('turns', []))
api_metrics = test.get('aggregate_metrics', {})
else:
notes = test.get('notes', '')
total_gen_time = test.get('generation_time')
api_metrics = test.get('api_metrics')
test_data = {
'test_id': test.get('test_id'),
'test_name': test.get('test_name'),
@@ -370,7 +390,9 @@ class WebInterface:
'difficulty': test.get('difficulty', 'medium'),
'score': score,
'status': test.get('status'),
'notes': test.get('notes', '')
'notes': notes,
'generation_time': total_gen_time,
'api_metrics': api_metrics
}
test_results.append(test_data)
@@ -688,6 +710,29 @@ class WebInterface:
box-shadow: 0 4px 15px var(--shadow-hover);
}
.scale-toggle {
position: absolute;
top: 30px;
right: 140px;
background: var(--border-color);
border: none;
padding: 10px 20px;
border-radius: 20px;
cursor: pointer;
font-size: 1em;
transition: all 0.3s;
}
.scale-toggle:hover {
transform: scale(1.05);
box-shadow: 0 4px 15px var(--shadow-hover);
}
.scale-toggle.zoomed {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
}
h1 {
font-size: 2.5em;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
@@ -1000,6 +1045,7 @@ class WebInterface:
<body>
<div class="container">
<header>
<button class="scale-toggle" id="scaleToggle" onclick="toggleScale()" title="Toggle between full scale (0-5) and zoomed view for better distinction">🔍 Full Scale</button>
<button class="theme-toggle" onclick="toggleTheme()">🌓 Toggle Dark Mode</button>
<h1>🧠 LLM Evaluation Dashboard</h1>
<p class="subtitle">Comprehensive Intelligence & Performance Analysis</p>
@@ -1095,6 +1141,8 @@ class WebInterface:
let statisticsData = null;
let intelligenceData = null;
let currentModelDetails = null;
let zoomedScale = false;
let overviewChartInstance = null;
// Theme toggle functionality
function toggleTheme() {
@@ -1111,6 +1159,81 @@ class WebInterface:
}
}
// Scale toggle functionality
function toggleScale() {
zoomedScale = !zoomedScale;
const btn = document.getElementById('scaleToggle');
if (zoomedScale) {
btn.textContent = '🔎 Zoomed';
btn.classList.add('zoomed');
} else {
btn.textContent = '🔍 Full Scale';
btn.classList.remove('zoomed');
}
localStorage.setItem('zoomedScale', zoomedScale ? 'enabled' : 'disabled');
// Refresh all charts with new scale
refreshAllCharts();
}
// Load scale preference
function loadScalePreference() {
const savedScale = localStorage.getItem('zoomedScale');
if (savedScale === 'enabled') {
zoomedScale = true;
const btn = document.getElementById('scaleToggle');
btn.textContent = '🔎 Zoomed';
btn.classList.add('zoomed');
}
}
// Calculate optimal Y-axis range for zoomed view
function getScaleOptions(data, isRadar = false) {
if (!zoomedScale) {
// Full scale: 0 to 5
if (isRadar) {
return { r: { beginAtZero: true, max: 5 } };
}
return { y: { beginAtZero: true, max: 5 } };
}
// Zoomed scale: calculate min/max with padding
const validData = data.filter(d => d !== null && d !== undefined && !isNaN(d));
if (validData.length === 0) {
if (isRadar) {
return { r: { beginAtZero: true, max: 5 } };
}
return { y: { beginAtZero: true, max: 5 } };
}
const minVal = Math.min(...validData);
const maxVal = Math.max(...validData);
const range = maxVal - minVal;
const padding = Math.max(range * 0.2, 0.2); // At least 0.2 padding
let min = Math.max(0, Math.floor((minVal - padding) * 10) / 10);
let max = Math.min(5, Math.ceil((maxVal + padding) * 10) / 10);
// Ensure we have at least some range
if (max - min < 0.5) {
min = Math.max(0, minVal - 0.3);
max = Math.min(5, maxVal + 0.3);
}
if (isRadar) {
return { r: { min: min, max: max, beginAtZero: false } };
}
return { y: { min: min, max: max, beginAtZero: false } };
}
// Refresh all charts when scale changes
function refreshAllCharts() {
if (comparisonData) {
refreshOverviewChart();
updateComparisonChart();
updateCategoryChart();
}
}
// Tab switching
function switchTab(tabName) {
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
@@ -1123,6 +1246,7 @@ class WebInterface:
// Initialize dashboard
async function initDashboard() {
loadThemePreference();
loadScalePreference();
await loadOverview();
await loadComparison();
await loadStatistics();
@@ -1163,30 +1287,42 @@ class WebInterface:
document.getElementById('overviewStats').innerHTML = statsHtml;
// Create overview chart
const ctx = document.getElementById('overviewChart').getContext('2d');
new Chart(ctx, {
type: 'bar',
data: {
labels: models,
datasets: [{
label: 'Average Score',
data: models.map(m => comparisonData.models[m].overall_stats.average || 0),
backgroundColor: 'rgba(102, 126, 234, 0.6)',
borderColor: 'rgba(102, 126, 234, 1)',
borderWidth: 2
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
scales: {
y: {
beginAtZero: true,
max: 5
}
}
}
});
refreshOverviewChart();
} catch (error) {
console.error('Error loading overview:', error);
}
}
function refreshOverviewChart() {
if (!comparisonData) return;
const models = Object.keys(comparisonData.models);
const data = models.map(m => comparisonData.models[m].overall_stats.average || 0);
if (overviewChartInstance) {
overviewChartInstance.destroy();
}
const ctx = document.getElementById('overviewChart').getContext('2d');
overviewChartInstance = new Chart(ctx, {
type: 'bar',
data: {
labels: models,
datasets: [{
label: 'Average Score',
data: data,
backgroundColor: 'rgba(102, 126, 234, 0.6)',
borderColor: 'rgba(102, 126, 234, 1)',
borderWidth: 2
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
scales: getScaleOptions(data)
}
});
} catch (error) {
console.error('Error loading overview:', error);
@@ -1240,11 +1376,7 @@ class WebInterface:
options: {
responsive: true,
maintainAspectRatio: false,
scales: {
r: {
beginAtZero: true
}
}
scales: getScaleOptions(data, true)
}
});
}
@@ -1465,12 +1597,7 @@ class WebInterface:
options: {
responsive: true,
maintainAspectRatio: false,
scales: {
y: {
beginAtZero: true,
max: 5
}
}
scales: getScaleOptions(data)
}
});
}
@@ -1535,7 +1662,7 @@ class WebInterface:
<td>${genTime}</td>
<td>${tokenInfo}${tokensPerSec}</td>
<td>${test.status}</td>
<td><small>${test.notes}</small></td>
<td><small>${test.notes || ''}</small></td>
</tr>
`;
});