File size: 3,985 Bytes
9203553
 
04f40cd
9203553
cdf268e
6c92442
9203553
2d26479
6c92442
 
 
 
 
 
 
04f40cd
e2473e2
2d26479
 
dc0e67a
2d26479
9ea9349
9203553
a579a90
 
 
a9273cf
9203553
e2473e2
 
 
 
 
 
 
 
 
 
 
 
04f40cd
e2473e2
 
 
 
 
 
 
 
 
 
 
04f40cd
e2473e2
 
 
 
9203553
 
04f40cd
 
 
 
 
 
834230d
 
 
b115a8e
834230d
 
 
04f40cd
9203553
1ca1e4b
04f40cd
 
 
9203553
6c92442
aa8b23d
6c92442
aa8b23d
 
dc801c4
6c92442
dc801c4
6c92442
aa8b23d
2d26479
 
 
 
fcf16bd
b115a8e
fcf16bd
04f40cd
9203553
 
 
 
2d0af54
 
9203553
2d0af54
9203553
2d0af54
9203553
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import logging
import os
import re

import pandas as pd  # type: ignore[import]
from datasets import get_dataset_config_names, load_dataset  # type: ignore[import]

from .formatting import model_hyperlink
from .leaderboard_formatting import (
    COLUMNS_PRETTY,
    METRICS_PER_TASK,
    SORT_COLUMN_PER_TASK,
    get_columns_per_task,
)
from .tasks_content import TASKS_PRETTY_REVERSE
from .utils import MD_LINK_PATTERN

try:
    AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
except FileNotFoundError as e:
    AVAILABLE_TASKS = []
    logging.warning("Dataset is not available! Check if token is expired.")

AVAILABLE_TASKS_STR = '  ;  '.join(AVAILABLE_TASKS)
logging.warning(f"Available tasks: {AVAILABLE_TASKS_STR}")


def _get_results_stub() -> pd.DataFrame:
    stub_df = pd.DataFrame(
        [
            {
                "Model Name": "GPT-4",
                "Availability": "Proprietary",
                "Context Size": "16k",
                "BLEU": "X",
                "ROUGE": "X",
                "ChrF": "X",
                "BERTScore": "X",
                "BERTScore (Normalized)": "X",
                "Submitted By": "🏟 Long Code Arena Team",
                "Resources": "",
            },
            {
                "Model Name": "CodeLlama-7b (instruct)",
                "Availability": "Llama 2 license",
                "Context Size": "16k",
                "BLEU": "X",
                "ROUGE": "X",
                "ChrF": "X",
                "BERTScore": "X",
                "BERTScore (Normalized)": "X",
                "Submitted By": "🏟 Long Code Arena Team",
                "Resources": "",
            },
        ]
    )
    return stub_df


def _process_urls(raw_urls: str) -> str:
    if not raw_urls:
        return raw_urls
    html_urls = [model_hyperlink(*re.search(MD_LINK_PATTERN, url.strip()).groups()) for url in raw_urls.split(",")]
    return ", ".join(html_urls)

def _extract_dataset_name(raw_urls: str) -> str:
    if not raw_urls:
        return raw_urls
    names = [re.search(MD_LINK_PATTERN, url.strip()).group(1) + ' context' for url in raw_urls.split(",")]
    return ", ".join(names)
    


def _get_results_dataset(task_id: str) -> pd.DataFrame:
    logging.info(f"Loading dataset: {task_id}...")
    results_df = load_dataset(
        os.environ["DATASET_ID"], task_id, split="test", download_mode="force_redownload"
    ).to_pandas()
    results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
    results_df["Context Size"] = results_df["Context Size"].map(lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x)

    results_df = results_df.sort_values(by=SORT_COLUMN_PER_TASK[task_id], ascending=False)

    for metric_column in METRICS_PER_TASK[task_id]:
        if "BERTScore" in metric_column:
            results_df[metric_column] = results_df[metric_column].map(lambda x: f"{x:.5f}")
        else:
            results_df[metric_column] = results_df[metric_column].map(lambda x: f"{x:.2f}")

    results_df["Model Name"] = [
        model_hyperlink(link=link, model_name=model_name) if link else model_name
        for link, model_name in zip(results_df["model_url"], results_df["Model Name"])
    ]
    if task_id == 'project_code_completion':
        results_df["Dataset Name"] = [_extract_dataset_name(urls) for urls in results_df["Dataset"]]
        results_df["Dataset"] = [_process_urls(urls) for urls in results_df["Dataset"]]
    results_df["Resources"] = [_process_urls(urls) for urls in results_df["Resources"]]
    results_df = results_df[get_columns_per_task(task_id)]
    return results_df


def get_results_for_task(task_pretty: str) -> pd.DataFrame:
    task_id = TASKS_PRETTY_REVERSE[task_pretty]
    if task_id in AVAILABLE_TASKS:
        logging.info(f"Retrieving results for {task_pretty}...")
        return _get_results_dataset(task_id)
    logging.info(f"Generating leaderboard stub for {task_pretty}...")
    return _get_results_stub()