improvements to graphs
This commit is contained in:
@@ -362,6 +362,26 @@ class WebInterface:
|
||||
test_results = []
|
||||
for test in results.get('test_results', []):
|
||||
score = test.get('score') or test.get('overall_score')
|
||||
|
||||
# Handle notes differently for multi-turn vs single-turn tests
|
||||
if test.get('type') == 'multi_turn' and 'turns' in test:
|
||||
# Combine notes from all turns for multi-turn tests
|
||||
turn_notes = []
|
||||
for turn in test.get('turns', []):
|
||||
turn_num = turn.get('turn', '?')
|
||||
turn_note = turn.get('notes', '')
|
||||
if turn_note:
|
||||
turn_notes.append(f"T{turn_num}: {turn_note}")
|
||||
notes = ' | '.join(turn_notes) if turn_notes else ''
|
||||
|
||||
# Get aggregate generation time and metrics for multi-turn
|
||||
total_gen_time = sum(t.get('generation_time', 0) for t in test.get('turns', []))
|
||||
api_metrics = test.get('aggregate_metrics', {})
|
||||
else:
|
||||
notes = test.get('notes', '')
|
||||
total_gen_time = test.get('generation_time')
|
||||
api_metrics = test.get('api_metrics')
|
||||
|
||||
test_data = {
|
||||
'test_id': test.get('test_id'),
|
||||
'test_name': test.get('test_name'),
|
||||
@@ -370,7 +390,9 @@ class WebInterface:
|
||||
'difficulty': test.get('difficulty', 'medium'),
|
||||
'score': score,
|
||||
'status': test.get('status'),
|
||||
'notes': test.get('notes', '')
|
||||
'notes': notes,
|
||||
'generation_time': total_gen_time,
|
||||
'api_metrics': api_metrics
|
||||
}
|
||||
test_results.append(test_data)
|
||||
|
||||
@@ -688,6 +710,29 @@ class WebInterface:
|
||||
box-shadow: 0 4px 15px var(--shadow-hover);
|
||||
}
|
||||
|
||||
.scale-toggle {
|
||||
position: absolute;
|
||||
top: 30px;
|
||||
right: 140px;
|
||||
background: var(--border-color);
|
||||
border: none;
|
||||
padding: 10px 20px;
|
||||
border-radius: 20px;
|
||||
cursor: pointer;
|
||||
font-size: 1em;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
|
||||
.scale-toggle:hover {
|
||||
transform: scale(1.05);
|
||||
box-shadow: 0 4px 15px var(--shadow-hover);
|
||||
}
|
||||
|
||||
.scale-toggle.zoomed {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2.5em;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
@@ -1000,6 +1045,7 @@ class WebInterface:
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<button class="scale-toggle" id="scaleToggle" onclick="toggleScale()" title="Toggle between full scale (0-5) and zoomed view for better distinction">🔍 Full Scale</button>
|
||||
<button class="theme-toggle" onclick="toggleTheme()">🌓 Toggle Dark Mode</button>
|
||||
<h1>🧠 LLM Evaluation Dashboard</h1>
|
||||
<p class="subtitle">Comprehensive Intelligence & Performance Analysis</p>
|
||||
@@ -1095,6 +1141,8 @@ class WebInterface:
|
||||
let statisticsData = null;
|
||||
let intelligenceData = null;
|
||||
let currentModelDetails = null;
|
||||
let zoomedScale = false;
|
||||
let overviewChartInstance = null;
|
||||
|
||||
// Theme toggle functionality
|
||||
function toggleTheme() {
|
||||
@@ -1111,6 +1159,81 @@ class WebInterface:
|
||||
}
|
||||
}
|
||||
|
||||
// Scale toggle functionality
|
||||
function toggleScale() {
|
||||
zoomedScale = !zoomedScale;
|
||||
const btn = document.getElementById('scaleToggle');
|
||||
if (zoomedScale) {
|
||||
btn.textContent = '🔎 Zoomed';
|
||||
btn.classList.add('zoomed');
|
||||
} else {
|
||||
btn.textContent = '🔍 Full Scale';
|
||||
btn.classList.remove('zoomed');
|
||||
}
|
||||
localStorage.setItem('zoomedScale', zoomedScale ? 'enabled' : 'disabled');
|
||||
// Refresh all charts with new scale
|
||||
refreshAllCharts();
|
||||
}
|
||||
|
||||
// Load scale preference
|
||||
function loadScalePreference() {
|
||||
const savedScale = localStorage.getItem('zoomedScale');
|
||||
if (savedScale === 'enabled') {
|
||||
zoomedScale = true;
|
||||
const btn = document.getElementById('scaleToggle');
|
||||
btn.textContent = '🔎 Zoomed';
|
||||
btn.classList.add('zoomed');
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate optimal Y-axis range for zoomed view
|
||||
function getScaleOptions(data, isRadar = false) {
|
||||
if (!zoomedScale) {
|
||||
// Full scale: 0 to 5
|
||||
if (isRadar) {
|
||||
return { r: { beginAtZero: true, max: 5 } };
|
||||
}
|
||||
return { y: { beginAtZero: true, max: 5 } };
|
||||
}
|
||||
|
||||
// Zoomed scale: calculate min/max with padding
|
||||
const validData = data.filter(d => d !== null && d !== undefined && !isNaN(d));
|
||||
if (validData.length === 0) {
|
||||
if (isRadar) {
|
||||
return { r: { beginAtZero: true, max: 5 } };
|
||||
}
|
||||
return { y: { beginAtZero: true, max: 5 } };
|
||||
}
|
||||
|
||||
const minVal = Math.min(...validData);
|
||||
const maxVal = Math.max(...validData);
|
||||
const range = maxVal - minVal;
|
||||
const padding = Math.max(range * 0.2, 0.2); // At least 0.2 padding
|
||||
|
||||
let min = Math.max(0, Math.floor((minVal - padding) * 10) / 10);
|
||||
let max = Math.min(5, Math.ceil((maxVal + padding) * 10) / 10);
|
||||
|
||||
// Ensure we have at least some range
|
||||
if (max - min < 0.5) {
|
||||
min = Math.max(0, minVal - 0.3);
|
||||
max = Math.min(5, maxVal + 0.3);
|
||||
}
|
||||
|
||||
if (isRadar) {
|
||||
return { r: { min: min, max: max, beginAtZero: false } };
|
||||
}
|
||||
return { y: { min: min, max: max, beginAtZero: false } };
|
||||
}
|
||||
|
||||
// Refresh all charts when scale changes
|
||||
function refreshAllCharts() {
|
||||
if (comparisonData) {
|
||||
refreshOverviewChart();
|
||||
updateComparisonChart();
|
||||
updateCategoryChart();
|
||||
}
|
||||
}
|
||||
|
||||
// Tab switching
|
||||
function switchTab(tabName) {
|
||||
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
|
||||
@@ -1123,6 +1246,7 @@ class WebInterface:
|
||||
// Initialize dashboard
|
||||
async function initDashboard() {
|
||||
loadThemePreference();
|
||||
loadScalePreference();
|
||||
await loadOverview();
|
||||
await loadComparison();
|
||||
await loadStatistics();
|
||||
@@ -1163,14 +1287,31 @@ class WebInterface:
|
||||
document.getElementById('overviewStats').innerHTML = statsHtml;
|
||||
|
||||
// Create overview chart
|
||||
refreshOverviewChart();
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error loading overview:', error);
|
||||
}
|
||||
}
|
||||
|
||||
function refreshOverviewChart() {
|
||||
if (!comparisonData) return;
|
||||
|
||||
const models = Object.keys(comparisonData.models);
|
||||
const data = models.map(m => comparisonData.models[m].overall_stats.average || 0);
|
||||
|
||||
if (overviewChartInstance) {
|
||||
overviewChartInstance.destroy();
|
||||
}
|
||||
|
||||
const ctx = document.getElementById('overviewChart').getContext('2d');
|
||||
new Chart(ctx, {
|
||||
overviewChartInstance = new Chart(ctx, {
|
||||
type: 'bar',
|
||||
data: {
|
||||
labels: models,
|
||||
datasets: [{
|
||||
label: 'Average Score',
|
||||
data: models.map(m => comparisonData.models[m].overall_stats.average || 0),
|
||||
data: data,
|
||||
backgroundColor: 'rgba(102, 126, 234, 0.6)',
|
||||
borderColor: 'rgba(102, 126, 234, 1)',
|
||||
borderWidth: 2
|
||||
@@ -1179,12 +1320,7 @@ class WebInterface:
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true,
|
||||
max: 5
|
||||
}
|
||||
}
|
||||
scales: getScaleOptions(data)
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1240,11 +1376,7 @@ class WebInterface:
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
scales: {
|
||||
r: {
|
||||
beginAtZero: true
|
||||
}
|
||||
}
|
||||
scales: getScaleOptions(data, true)
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1465,12 +1597,7 @@ class WebInterface:
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true,
|
||||
max: 5
|
||||
}
|
||||
}
|
||||
scales: getScaleOptions(data)
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1535,7 +1662,7 @@ class WebInterface:
|
||||
<td>${genTime}</td>
|
||||
<td>${tokenInfo}${tokensPerSec}</td>
|
||||
<td>${test.status}</td>
|
||||
<td><small>${test.notes}</small></td>
|
||||
<td><small>${test.notes || ''}</small></td>
|
||||
</tr>
|
||||
`;
|
||||
});
|
||||
|
||||
@@ -78,6 +78,29 @@
|
||||
box-shadow: 0 4px 15px var(--shadow-hover);
|
||||
}
|
||||
|
||||
.scale-toggle {
|
||||
position: absolute;
|
||||
top: 30px;
|
||||
right: 140px;
|
||||
background: var(--border-color);
|
||||
border: none;
|
||||
padding: 10px 20px;
|
||||
border-radius: 20px;
|
||||
cursor: pointer;
|
||||
font-size: 1em;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
|
||||
.scale-toggle:hover {
|
||||
transform: scale(1.05);
|
||||
box-shadow: 0 4px 15px var(--shadow-hover);
|
||||
}
|
||||
|
||||
.scale-toggle.zoomed {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2.5em;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
@@ -389,8 +412,7 @@
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<button class="theme-toggle" onclick="toggleTheme()">🌓 Toggle Dark Mode</button>
|
||||
<header> <button class="scale-toggle" id="scaleToggle" onclick="toggleScale()" title="Toggle between full scale (0-5) and zoomed view for better distinction">🔍 Full Scale</button> <button class="theme-toggle" onclick="toggleTheme()">🌓 Toggle Dark Mode</button>
|
||||
<h1>🧠 LLM Evaluation Dashboard</h1>
|
||||
<p class="subtitle">Comprehensive Intelligence & Performance Analysis</p>
|
||||
</header>
|
||||
@@ -485,6 +507,8 @@
|
||||
let statisticsData = null;
|
||||
let intelligenceData = null;
|
||||
let currentModelDetails = null;
|
||||
let zoomedScale = false;
|
||||
let overviewChartInstance = null;
|
||||
|
||||
// Theme toggle functionality
|
||||
function toggleTheme() {
|
||||
@@ -501,6 +525,81 @@
|
||||
}
|
||||
}
|
||||
|
||||
// Scale toggle functionality
|
||||
function toggleScale() {
|
||||
zoomedScale = !zoomedScale;
|
||||
const btn = document.getElementById('scaleToggle');
|
||||
if (zoomedScale) {
|
||||
btn.textContent = '🔎 Zoomed';
|
||||
btn.classList.add('zoomed');
|
||||
} else {
|
||||
btn.textContent = '🔍 Full Scale';
|
||||
btn.classList.remove('zoomed');
|
||||
}
|
||||
localStorage.setItem('zoomedScale', zoomedScale ? 'enabled' : 'disabled');
|
||||
// Refresh all charts with new scale
|
||||
refreshAllCharts();
|
||||
}
|
||||
|
||||
// Load scale preference
|
||||
function loadScalePreference() {
|
||||
const savedScale = localStorage.getItem('zoomedScale');
|
||||
if (savedScale === 'enabled') {
|
||||
zoomedScale = true;
|
||||
const btn = document.getElementById('scaleToggle');
|
||||
btn.textContent = '🔎 Zoomed';
|
||||
btn.classList.add('zoomed');
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate optimal Y-axis range for zoomed view
|
||||
function getScaleOptions(data, isRadar = false) {
|
||||
if (!zoomedScale) {
|
||||
// Full scale: 0 to 5
|
||||
if (isRadar) {
|
||||
return { r: { beginAtZero: true, max: 5 } };
|
||||
}
|
||||
return { y: { beginAtZero: true, max: 5 } };
|
||||
}
|
||||
|
||||
// Zoomed scale: calculate min/max with padding
|
||||
const validData = data.filter(d => d !== null && d !== undefined && !isNaN(d));
|
||||
if (validData.length === 0) {
|
||||
if (isRadar) {
|
||||
return { r: { beginAtZero: true, max: 5 } };
|
||||
}
|
||||
return { y: { beginAtZero: true, max: 5 } };
|
||||
}
|
||||
|
||||
const minVal = Math.min(...validData);
|
||||
const maxVal = Math.max(...validData);
|
||||
const range = maxVal - minVal;
|
||||
const padding = Math.max(range * 0.2, 0.2); // At least 0.2 padding
|
||||
|
||||
let min = Math.max(0, Math.floor((minVal - padding) * 10) / 10);
|
||||
let max = Math.min(5, Math.ceil((maxVal + padding) * 10) / 10);
|
||||
|
||||
// Ensure we have at least some range
|
||||
if (max - min < 0.5) {
|
||||
min = Math.max(0, minVal - 0.3);
|
||||
max = Math.min(5, maxVal + 0.3);
|
||||
}
|
||||
|
||||
if (isRadar) {
|
||||
return { r: { min: min, max: max, beginAtZero: false } };
|
||||
}
|
||||
return { y: { min: min, max: max, beginAtZero: false } };
|
||||
}
|
||||
|
||||
// Refresh all charts when scale changes
|
||||
function refreshAllCharts() {
|
||||
if (comparisonData) {
|
||||
refreshOverviewChart();
|
||||
updateComparisonChart();
|
||||
updateCategoryChart();
|
||||
}
|
||||
}
|
||||
|
||||
// Tab switching
|
||||
function switchTab(tabName) {
|
||||
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
|
||||
@@ -513,6 +612,7 @@
|
||||
// Initialize dashboard
|
||||
async function initDashboard() {
|
||||
loadThemePreference();
|
||||
loadScalePreference();
|
||||
await loadOverview();
|
||||
await loadComparison();
|
||||
await loadStatistics();
|
||||
@@ -553,14 +653,31 @@
|
||||
document.getElementById('overviewStats').innerHTML = statsHtml;
|
||||
|
||||
// Create overview chart
|
||||
refreshOverviewChart();
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error loading overview:', error);
|
||||
}
|
||||
}
|
||||
|
||||
function refreshOverviewChart() {
|
||||
if (!comparisonData) return;
|
||||
|
||||
const models = Object.keys(comparisonData.models);
|
||||
const data = models.map(m => comparisonData.models[m].overall_stats.average || 0);
|
||||
|
||||
if (overviewChartInstance) {
|
||||
overviewChartInstance.destroy();
|
||||
}
|
||||
|
||||
const ctx = document.getElementById('overviewChart').getContext('2d');
|
||||
new Chart(ctx, {
|
||||
overviewChartInstance = new Chart(ctx, {
|
||||
type: 'bar',
|
||||
data: {
|
||||
labels: models,
|
||||
datasets: [{
|
||||
label: 'Average Score',
|
||||
data: models.map(m => comparisonData.models[m].overall_stats.average || 0),
|
||||
data: data,
|
||||
backgroundColor: 'rgba(102, 126, 234, 0.6)',
|
||||
borderColor: 'rgba(102, 126, 234, 1)',
|
||||
borderWidth: 2
|
||||
@@ -569,12 +686,7 @@
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true,
|
||||
max: 5
|
||||
}
|
||||
}
|
||||
scales: getScaleOptions(data)
|
||||
}
|
||||
});
|
||||
|
||||
@@ -630,11 +742,7 @@
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
scales: {
|
||||
r: {
|
||||
beginAtZero: true
|
||||
}
|
||||
}
|
||||
scales: getScaleOptions(data, true)
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -855,12 +963,7 @@
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true,
|
||||
max: 5
|
||||
}
|
||||
}
|
||||
scales: getScaleOptions(data)
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -925,7 +1028,7 @@
|
||||
<td>${genTime}</td>
|
||||
<td>${tokenInfo}${tokensPerSec}</td>
|
||||
<td>${test.status}</td>
|
||||
<td><small>${test.notes}</small></td>
|
||||
<td><small>${test.notes || ''}</small></td>
|
||||
</tr>
|
||||
`;
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user