# inspired by https://huggingface.co/spaces/ought/raft-leaderboard import numpy as np import pandas as pd import requests import streamlit as st from tasks import TASKS from huggingface_hub import HfApi import datasets import os FORMATTED_TASK_NAMES = TASKS api = HfApi() def download_submissions(): submissions = api.list_datasets( filter=("benchmark:mteb"), use_auth_token=os.getenv("HF_HUB_TOKEN") ) return submissions @st.cache def format_submissions(submissions): submission_data = {**{"Submitter": []}, **{"Submission Name": []}, **{"Submission Date": []}, **{t: [] for t in TASKS}} # The following picks the latest submissions which adhere to the model card schema for submission in submissions: submission_id = submission.id try: data = list(datasets.load_dataset(submission_id, streaming=True, data_files="*csv").values())[0] except FileNotFoundError: print("FileNotFoundError") continue for line in data: if line['dataset'] in submission_data: submission_data[line['dataset']].append(line['value']) if len(submission_data['Submission Name']) == 0 or line['model'] != submission_data['Submission Name'][-1]: submission_data['Submission Name'].append(line['model']) while len(submission_data['Submission Name']) > len(submission_data['Submitter']): submission_data['Submitter'].append(submission.author) submission_data["Submission Date"].append(pd.to_datetime(submission.lastModified).strftime("%b %d, %Y")) df = pd.DataFrame(submission_data) df.insert(3, "Overall", df[TASKS].mean(axis=1)) df = df.copy().sort_values("Overall", ascending=False) df.rename(columns={k: v for k, v in zip(TASKS, FORMATTED_TASK_NAMES)}, inplace=True) # Start ranking from 1 df.insert(0, "Rank", np.arange(1, len(df) + 1)) return df ########### ### APP ### ########### st.set_page_config(layout="wide") st.title("MTEB: Massive Text Embedding Benchmark") st.markdown( """ To submit to MTEB, please follow the following instructions: - Publish your .csv MTEB scores to a public Hugging Face Hub Dataset. The .csv files must be at the root of the repo. - Add the following to the top of your model card: ``` --- benchmark: mteb type: evaluation --- ``` That's all! [Here's an example](https://huggingface.co/datasets/mteb/mteb-example-submission/tree/main) of how your repo should look like. You should now be able to see your results in the leaderboard below. """ ) submissions = download_submissions() df = format_submissions(submissions) styler = df.style.set_precision(3).set_properties(**{"white-space": "pre-wrap", "text-align": "center"}) # hack to remove index column: https://discuss.streamlit.io/t/questions-on-st-table/6878/3 st.markdown( """ """, unsafe_allow_html=True, ) st.table(styler)