Created
May 13, 2025 09:18
-
-
Save angelsen/507ef820bdde2fa73a730b744c18f1a8 to your computer and use it in GitHub Desktop.
Enhances Berkeley Function Calling Leaderboard with parameter counts and efficiency metrics for identifying the most cost-effective AI models.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // ==UserScript== | |
| // @name BFCL Parameter Analysis | |
| // @namespace http://tampermonkey.net/ | |
| // @version 2025-05-13 | |
| // @description Extract parameter counts and add efficiency metrics to Berkeley Function Calling Leaderboard | |
| // @author Fredrik Angelsen | |
| // @match https://gorilla.cs.berkeley.edu/leaderboard.html | |
| // @icon https://www.google.com/s2/favicons?sz=64&domain=berkeley.edu | |
| // @grant none | |
| // ==/UserScript== | |
| (function() { | |
| 'use strict'; | |
| // Wait for the table to be fully loaded | |
| const checkForTable = setInterval(() => { | |
| const table = document.getElementById('leaderboard-table'); | |
| if (table && table.querySelector('tbody tr')) { | |
| clearInterval(checkForTable); | |
| enhanceLeaderboard(); | |
| } | |
| }, 1000); | |
| function enhanceLeaderboard() { | |
| // Get the table header and add new column headers | |
| const headerRow = document.querySelector('#leaderboard-head tr:last-child'); | |
| // Add new header cells | |
| const paramHeader = document.createElement('th'); | |
| paramHeader.className = 'column-header'; | |
| paramHeader.textContent = 'Params (B)'; | |
| paramHeader.id = 'param-col'; | |
| paramHeader.addEventListener('click', () => sortTableByColumn('param')); | |
| paramHeader.innerHTML += '<span></span>'; // For sorting indicator | |
| const efficiencyHeader = document.createElement('th'); | |
| efficiencyHeader.className = 'column-header'; | |
| efficiencyHeader.textContent = 'Acc/Param'; | |
| efficiencyHeader.id = 'efficiency-col'; | |
| efficiencyHeader.addEventListener('click', () => sortTableByColumn('efficiency')); | |
| efficiencyHeader.innerHTML += '<span></span>'; // For sorting indicator | |
| headerRow.appendChild(paramHeader); | |
| headerRow.appendChild(efficiencyHeader); | |
| // Process each row in the table | |
| const rows = document.querySelectorAll('#leaderboard-table tbody tr'); | |
| rows.forEach(row => { | |
| // Get model name and accuracy | |
| const modelCell = row.querySelector('td:nth-child(3)'); | |
| const modelName = modelCell.textContent; | |
| const accuracy = parseFloat(row.querySelector('td:nth-child(2)').textContent); | |
| // Extract parameter count | |
| const params = extractParameterCount(modelName); | |
| // Calculate efficiency (accuracy per billion parameters) | |
| const efficiency = params > 0 ? (accuracy / params).toFixed(2) : 'N/A'; | |
| // Create and append parameter cell | |
| const paramCell = document.createElement('td'); | |
| paramCell.textContent = params > 0 ? params : 'Unknown'; | |
| paramCell.dataset.value = params; | |
| row.appendChild(paramCell); | |
| // Create and append efficiency cell | |
| const efficiencyCell = document.createElement('td'); | |
| efficiencyCell.textContent = efficiency; | |
| efficiencyCell.dataset.value = efficiency !== 'N/A' ? efficiency : -1; | |
| row.appendChild(efficiencyCell); | |
| }); | |
| // Add explanation of the new metrics | |
| const explanation = document.createElement('p'); | |
| explanation.innerHTML = '<b>Params (B)</b> = Parameter count in billions. <b>Acc/Param</b> = Accuracy per billion parameters (higher is better).'; | |
| const leaderboardDiv = document.getElementById('leaderboard'); | |
| leaderboardDiv.querySelector('.col-md-12').appendChild(explanation); | |
| } | |
| function extractParameterCount(modelName) { | |
| // Extract parameter count using regex | |
| const patterns = [ | |
| /(\d+(?:\.\d+)?)b/i, // matches 32b, 3.5b, etc. | |
| /(\d+(?:\.\d+)?)B/, // matches 70B, 3.5B, etc. | |
| /-(\d+(?:\.\d+)?)b-/i, // matches -32b-, -3.5b-, etc. | |
| /-(\d+(?:\.\d+)?)B-/, // matches -70B-, -3.5B-, etc. | |
| /(\d+(?:\.\d+)?)b-/i, // matches 32b-, 3.5b-, etc. | |
| /(\d+(?:\.\d+)?)B-/, // matches 70B-, 3.5B-, etc. | |
| /-(\d+(?:\.\d+)?)b/i, // matches -32b, -3.5b, etc. | |
| /-(\d+(?:\.\d+)?)B/, // matches -70B, -3.5B, etc. | |
| ]; | |
| // Try each regex pattern | |
| for (const pattern of patterns) { | |
| const match = modelName.match(pattern); | |
| if (match && match[1]) { | |
| return parseFloat(match[1]); | |
| } | |
| } | |
| return 0; // Unknown size | |
| } | |
| function sortTableByColumn(column) { | |
| const table = document.getElementById('leaderboard-table'); | |
| const tbody = table.querySelector('tbody'); | |
| const rows = Array.from(tbody.querySelectorAll('tr')); | |
| // Get the current sort direction or set default | |
| const header = document.getElementById(`${column}-col`); | |
| const sortSpan = header.querySelector('span'); | |
| const isAscending = sortSpan.textContent === ' 🔼'; | |
| // Update sorting indicators | |
| document.querySelectorAll('#leaderboard-head th span').forEach(span => { | |
| span.textContent = ''; | |
| }); | |
| // Set the appropriate sort direction indicator | |
| sortSpan.textContent = isAscending ? ' 🔽' : ' 🔼'; | |
| rows.sort((a, b) => { | |
| let aValue, bValue; | |
| if (column === 'param') { | |
| aValue = parseFloat(a.querySelector('td:nth-last-child(2)').dataset.value) || 0; | |
| bValue = parseFloat(b.querySelector('td:nth-last-child(2)').dataset.value) || 0; | |
| } else if (column === 'efficiency') { | |
| aValue = parseFloat(a.querySelector('td:last-child').dataset.value) || -1; | |
| bValue = parseFloat(b.querySelector('td:last-child').dataset.value) || -1; | |
| } | |
| return isAscending ? aValue - bValue : bValue - aValue; | |
| }); | |
| // Re-add sorted rows to the table | |
| rows.forEach(row => tbody.appendChild(row)); | |
| } | |
| })(); |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To use this script: