File size: 6,297 Bytes
b3d493a 3364f2f 0070ea2 02764ad acea508 3364f2f b3d493a 4bccbd5 b3d493a acea508 b3d493a a0f1951 3364f2f acea508 b3d493a fad338b b35cd1f 0c4c053 b3d493a 0c4c053 acea508 b3d493a acea508 b3d493a acea508 b3d493a acea508 dd2da1a 0c4c053 acea508 0c4c053 acea508 dd2da1a acea508 0c4c053 acea508 3364f2f b35cd1f 3364f2f b35cd1f e17557b 02764ad b3d493a a0f1951 02764ad af3ccf3 55541c0 b3d493a 0c4c053 acea508 0c4c053 acea508 0c4c053 b35cd1f 0c4c053 02764ad 99a545d fad338b 3364f2f d94b1a3 02764ad b3d493a 02764ad acea508 3364f2f acea508 3364f2f 02764ad b3d493a 0c4c053 acea508 0c4c053 a0f1951 3364f2f a0f1951 1f689a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import React, { useState, useEffect } from 'react';
import { chain } from 'lodash';
import './App.css';
const ScoreBar = ({ score }) => {
if (score === undefined || score === null) return null;
const percentage = score <= 1 ? score * 100 : score;
const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green)
const backgroundColor = `hsl(${hue}, 80%, 50%)`;
return (
<div className="score-bar">
<div
className="score-fill"
style={{
width: `${percentage}%`,
backgroundColor
}}
/>
<span className="score-text">
{percentage.toFixed(1)}%
</span>
</div>
);
};
const App = () => {
const [allData, setAllData] = useState([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState(null);
const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' });
const [searchQuery, setSearchQuery] = useState('');
const [showVanilla, setShowVanilla] = useState(true);
const [showToolCalling, setShowToolCalling] = useState(false);
useEffect(() => {
const fetchData = async () => {
try {
setLoading(true);
// Fetch all data from API
const response = await fetch('https://smolagents-smolagents-llm-leaderboard.hf.space/api/results');
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const jsonData = await response.json();
setAllData(jsonData);
} catch (err) {
console.error('Error fetching data:', err);
setError(err.message);
} finally {
setLoading(false);
}
};
fetchData();
}, []);
const handleSort = (key) => {
const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc';
setSortConfig({ key, direction });
};
// Filter data based on selected action type
const getFilteredData = () => {
const actionType = showToolCalling ? 'tool-calling' : 'code';
return allData.filter(item => item.agent_action_type === actionType);
};
// Get vanilla score for a model
const getVanillaScore = (modelId, metric) => {
const vanillaEntry = allData.find(item =>
item.model_id === modelId && item.agent_action_type === 'vanilla'
);
return vanillaEntry?.scores[metric];
};
const filteredAndSortedData = chain(getFilteredData())
.filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase()))
.orderBy(
[item => {
if (sortConfig.key === 'model') {
return item.model_id;
}
return item.scores[sortConfig.key] || 0;
}],
[sortConfig.direction]
)
.value();
if (loading) return <div className="container">Loading benchmark results...</div>;
if (error) return <div className="container" style={{color: 'red'}}>Error: {error}</div>;
return (
<div className="container">
<div className="header">
<h1 className="title">Smolagents Leaderboard</h1>
<p className="subtitle">How do different LLMs compare for powering agents?</p>
<p className="subtitle">Uses <a target="_blank" href="https://github.com/huggingface/smolagents">smolagents</a> with <a target="_blank" href="https://huggingface.co/datasets/smolagents/benchmark-v1">smolagents benchmark</a>.</p>
</div>
<div className="search-container">
<div className="search-with-options">
<input
type="text"
className="search-input"
placeholder="Search models..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
/>
<div className="options-container">
<label className="option-label">
<input
type="checkbox"
checked={showVanilla}
onChange={() => setShowVanilla(!showVanilla)}
/>
Show Vanilla Scores
</label>
<label className="option-label">
<input
type="checkbox"
checked={showToolCalling}
onChange={() => setShowToolCalling(!showToolCalling)}
/>
Show Tool-Calling Scores
</label>
</div>
</div>
</div>
<div className="table-container">
<table>
<thead>
<tr>
<th onClick={() => handleSort('model')}>
Model {sortConfig.key === 'model' && (
sortConfig.direction === 'desc' ? 'β' : 'β'
)}
</th>
{["Average", "GAIA", "MATH", "SimpleQA"].map(benchmark => (
<th key={benchmark} onClick={() => handleSort(benchmark)}>
{benchmark} {sortConfig.key === benchmark && (
sortConfig.direction === 'desc' ? 'β' : 'β'
)}
</th>
))}
</tr>
</thead>
<tbody>
{filteredAndSortedData.map((item, index) => (
<tr key={index}>
<td className="model-cell">
<div className="model-name">{item.model_id}</div>
{showVanilla && (
<div className="vanilla-name">
{`vanilla: ${getVanillaScore(item.model_id, 'Average')?.toFixed(1) || 'N/A'}%`}
</div>
)}
</td>
{["Average", "GAIA", "MATH", "SimpleQA"].map(metric => (
<td key={metric}>
<ScoreBar score={item.scores[metric]} />
{showVanilla && getVanillaScore(item.model_id, metric) !== undefined && (
<ScoreBar score={getVanillaScore(item.model_id, metric)} />
)}
</td>
))}
</tr>
))}
</tbody>
</table>
</div>
<div className="legend">
<p><strong>Agent type:</strong> {showToolCalling ? 'Tool-Calling' : 'Code'}{showVanilla ? ' (with Vanilla comparison)' : ''}</p>
</div>
</div>
);
};
export default App; |