import gradio as gr from app.utils import add_rank_and_format, filter_models, get_refresh_function from data.model_handler import ModelHandler METRICS = ["ndcg_at_5", "recall_at_1"] def main(): model_handler = ModelHandler() initial_metric = "ndcg_at_5" data = model_handler.get_vidore_data(initial_metric) data = add_rank_and_format(data) NUM_DATASETS = len(data.columns) - 3 NUM_SCORES = len(data) * NUM_DATASETS NUM_MODELS = len(data) css = """ table > thead { white-space: normal } table { --cell-width-1: 250px } table > tbody > tr > td:nth-child(2) > div { overflow-x: auto } .filter-checkbox-group { max-width: max-content; } #markdown size .markdown { font-size: 1rem; } """ with gr.Blocks(css=css) as block: with gr.Tabs(): with gr.TabItem("πŸ† Leaderboard"): gr.Markdown("# ViDoRe: The Visual Document Retrieval Benchmark πŸ“šπŸ”") gr.Markdown("### From the paper - ColPali: Efficient Document Retrieval with Vision Language Models πŸ‘€") gr.Markdown( """ Visual Document Retrieval Benchmark leaderboard. To submit results, refer to the corresponding tab. Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models. """ ) datasets_columns = list(data.columns[3:]) anchor_columns = list(data.columns[:3]) default_columns = anchor_columns + datasets_columns with gr.Row(): metric_dropdown = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric") research_textbox = gr.Textbox(placeholder="πŸ” Search Models... [press enter]", label="Filter Models by Name", ) column_checkboxes = gr.CheckboxGroup(choices=datasets_columns, value=default_columns, label="Select Columns to Display") with gr.Row(): datatype = ["number", "markdown"] + ["number"] * (NUM_DATASETS + 1) dataframe = gr.Dataframe(data, datatype=datatype, type="pandas") def update_data(metric, search_term, selected_columns): data = model_handler.get_vidore_data(metric) data = add_rank_and_format(data) data = filter_models(data, search_term) if selected_columns: selected_columns = selected_columns data = data[selected_columns] return data with gr.Row(): refresh_button = gr.Button("Refresh") refresh_button.click(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe, concurrency_limit=20) # Automatically refresh the dataframe when the dropdown value changes metric_dropdown.change(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe) research_textbox.submit( lambda metric, search_term, selected_columns: update_data(metric, search_term, selected_columns), inputs=[metric_dropdown, research_textbox, column_checkboxes], outputs=dataframe ) column_checkboxes.change( lambda metric, search_term, selected_columns: update_data(metric, search_term, selected_columns), inputs=[metric_dropdown, research_textbox, column_checkboxes], outputs=dataframe ) #column_checkboxes.change(get_refresh_function(), inputs=[metric_dropdown, column_checkboxes], outputs=dataframe) gr.Markdown( f""" - **Total Datasets**: {NUM_DATASETS} - **Total Scores**: {NUM_SCORES} - **Total Models**: {NUM_MODELS} """ + r""" Please consider citing: ```bibtex @misc{faysse2024colpaliefficientdocumentretrieval, title={ColPali: Efficient Document Retrieval with Vision Language Models}, author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and CΓ©line Hudelot and Pierre Colombo}, year={2024}, eprint={2407.01449}, archivePrefix={arXiv}, primaryClass={cs.IR}, url={https://arxiv.org/abs/2407.01449}, } ``` """ ) with gr.TabItem("πŸ“š Submit your model"): gr.Markdown("# How to Submit a New Model to the Leaderboard") gr.Markdown( """ To submit a new model to the ViDoRe leaderboard, follow these steps: 1. **Evaluate your model**: - Follow the evaluation script provided in the [ViDoRe GitHub repository](https://github.com/illuin-tech/vidore-benchmark/) 2. **Format your submission file**: - The submission file should automatically be generated, and named `results.json` with the following structure: ```json { "dataset_name_1": { "metric_1": score_1, "metric_2": score_2, ... }, "dataset_name_2": { "metric_1": score_1, "metric_2": score_2, ... }, } ``` - The dataset names should be the same as the ViDoRe dataset names listed in the following collection: [ViDoRe Benchmark](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d). 3. **Submit your model**: - Create a public HuggingFace model repository with your model. - Add the tag `vidore` to your model in the metadata of the model card and place the `results.json` file at the root. And you're done! Your model will appear on the leaderboard when you click refresh! Once the space gets rebooted, it will appear on startup. """ ) block.queue(max_size=10).launch(debug=True) if __name__ == "__main__": main()