|
import os |
|
import gistyc |
|
import requests |
|
from dataclasses import dataclass |
|
import re |
|
import streamlit as st |
|
|
|
@dataclass |
|
class GistInfo: |
|
gist_id: str |
|
filename: str |
|
url: str |
|
model_name: str |
|
model_id: str |
|
model: str |
|
agieval: float |
|
gpt4all: float |
|
truthfulqa: float |
|
bigbench: float |
|
average: float |
|
|
|
|
|
def update_gist(content, gist_id, access_token): |
|
""" |
|
Update the content of a GitHub Gist. |
|
|
|
Args: |
|
content (str): The new content of the gist. |
|
gist_id (str): The ID of the gist to update. |
|
access_token (str): GitHub personal access token with gist permissions. |
|
""" |
|
api_url = f"https://api.github.com/gists/{gist_id}" |
|
headers = { |
|
"Authorization": f"token {access_token}", |
|
"Accept": "application/vnd.github.v3+json" |
|
} |
|
data = { |
|
"files": { |
|
"YALL - Yet Another LLM Leaderboard.md": { |
|
"content": content |
|
} |
|
} |
|
} |
|
|
|
response = requests.patch(api_url, json=data, headers=headers) |
|
|
|
if response.status_code == 200: |
|
print("Gist updated successfully.") |
|
else: |
|
print("Failed to update gist. Status code:", response.status_code) |
|
print("Response:", response.json()) |
|
|
|
|
|
@st.cache_data |
|
def create_yall(): |
|
|
|
GITHUB_API_TOKEN = os.environ.get("github") |
|
|
|
|
|
gist_api = gistyc.GISTyc(auth_token=GITHUB_API_TOKEN) |
|
data = gist_api.get_gists() |
|
|
|
|
|
gist_infos = [] |
|
|
|
for data_dict in data: |
|
if 'files' not in data_dict or not data_dict['files']: |
|
continue |
|
|
|
file_info = next(iter(data_dict['files'].values())) |
|
filename = file_info['filename'] |
|
if not filename.endswith("-Nous.md"): |
|
continue |
|
|
|
raw_url = file_info['raw_url'] |
|
response = requests.get(raw_url) |
|
if response.status_code != 200 or "Error: File does not exist" in response.text: |
|
continue |
|
|
|
|
|
lines = response.text.split('\n') |
|
if len(lines) < 3: |
|
continue |
|
|
|
values = lines[2].split('|')[1:-1] |
|
|
|
|
|
model_match = re.search(r'\[([^\]]+)\]\(https://huggingface.co/([^/]+)/([^)]+)\)', values[0].strip()) |
|
if model_match: |
|
model_name = model_match.group(1) |
|
model_id = f"{model_match.group(2)}/{model_match.group(3)}" |
|
print(values[0].strip()) |
|
print(model_name) |
|
print(model_id) |
|
print("=============") |
|
else: |
|
model_name = model_id = 'Unknown' |
|
|
|
|
|
gist_info = GistInfo( |
|
gist_id=data_dict['id'], |
|
filename=filename, |
|
url=data_dict['html_url'], |
|
model_name=model_name, |
|
model_id=model_id, |
|
model=values[0].strip(), |
|
agieval=float(values[1].strip()), |
|
gpt4all=float(values[2].strip()), |
|
truthfulqa=float(values[3].strip()), |
|
bigbench=float(values[4].strip()), |
|
average=float(sum([values[1],values[2],values[4]]/3.strip()), |
|
) |
|
gist_infos.append(gist_info) |
|
|
|
|
|
gist_infos = sorted(gist_infos, key=lambda x: x.average, reverse=True) |
|
|
|
|
|
markdown_table = "| Model | Average | AGIEval | GPT4All | TruthfulQA | Bigbench |\n" |
|
markdown_table += "|---|---:|---:|---:|---:|---:|\n" |
|
|
|
for gist in gist_infos: |
|
model_link = f"[{gist.model_id}](https://huggingface.co/{gist.model_id})" |
|
markdown_table += f"| {model_link} [π]({gist.url}) | {gist.average} | {gist.agieval} | {gist.gpt4all} | {gist.truthfulqa} | {gist.bigbench} |\n" |
|
|
|
|
|
update_gist(content=markdown_table, gist_id="90294929a2dbcb8877f9696f28105fdf", access_token=GITHUB_API_TOKEN) |
|
|
|
return markdown_table |