| | import os |
| | import json |
| | import csv |
| | import datetime |
| | from email.utils import parseaddr |
| |
|
| | import gradio as gr |
| | import pandas as pd |
| | import numpy as np |
| |
|
| | from datasets import load_dataset |
| | from apscheduler.schedulers.background import BackgroundScheduler |
| | from huggingface_hub import HfApi |
| |
|
| | from scorer import instruction_scorer |
| | from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink |
| |
|
| | TOKEN = os.environ.get("TOKEN", None) |
| | OWNER="ucla-contextual" |
| | TEST_DATASET = f"{OWNER}/contextual_test" |
| | VAL_DATASET = f"{OWNER}/contextual_val" |
| | SUBMISSION_DATASET = f"{OWNER}/submissions_internal" |
| | CONTACT_DATASET = f"{OWNER}/contact_info" |
| | RESULTS_DATASET = f"{OWNER}/results" |
| | LEADERBOARD_PATH = f"{OWNER}/leaderboard" |
| | api = HfApi() |
| |
|
| | YEAR_VERSION = "2024" |
| |
|
| | def read_json_file(filepath): |
| | with open(filepath) as infile: |
| | data_dict = json.load(infile) |
| | return data_dict |
| |
|
| | def save_json_file(filepath, data_dict): |
| | with open(filepath, "w") as outfile: |
| | json.dump(data_dict, outfile) |
| | |
| | os.makedirs("scored", exist_ok=True) |
| |
|
| | test_data_files = {"test": "contextual_test.csv"} |
| | test_dataset = load_dataset(TEST_DATASET, data_files=test_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True) |
| |
|
| | val_data_files = {"val": "contextual_val.csv"} |
| | val_dataset = load_dataset(VAL_DATASET, data_files=val_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True) |
| |
|
| | results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"} |
| | results = load_dataset(RESULTS_DATASET, data_files= |
| | results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) |
| |
|
| | contacts_data_files = {"contacts": "contacts.csv"} |
| | contact_infos = load_dataset(CONTACT_DATASET, data_files=contacts_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) |
| |
|
| | def get_dataframe_from_results(results, split): |
| | df = results[split].to_pandas() |
| | df.drop(columns=['URL'], inplace=True) |
| | df = df.sort_values(by=["All"], ascending=False) |
| | return df |
| |
|
| | test_dataset_dataframe = test_dataset["test"].to_pandas() |
| | val_dataset_dataframe = val_dataset["val"].to_pandas() |
| |
|
| | contacts_dataframe = contact_infos["contacts"].to_pandas() |
| |
|
| | val_results_dataframe = get_dataframe_from_results(results=results, split="val") |
| | test_results_dataframe = get_dataframe_from_results(results=results, split="test") |
| |
|
| | def restart_space(): |
| | api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN) |
| |
|
| | TYPES = ["markdown", "markdown", "markdown", "number", "number", "number","number", "number", "number", "number", "number", "number"] |
| |
|
| | def add_new_eval( |
| | model: str, |
| | method: str, |
| | url: str, |
| | path_to_file: str, |
| | organisation: str, |
| | mail: str, |
| | ): |
| | print("printing all inputs:", model, method, url, path_to_file, organisation, mail) |
| |
|
| | if len(model)==0: |
| | print("model none") |
| | raise gr.Error("Please provide a model name. Field empty!") |
| | |
| | if len(method)==0: |
| | print("method none") |
| | raise gr.Error("Please provide a method. Field empty!") |
| | |
| | if len(organisation)==0: |
| | print("org none") |
| | raise gr.Error("Please provide organisation information. Field empty!") |
| | |
| | |
| | _, parsed_mail = parseaddr(mail) |
| | if not "@" in parsed_mail: |
| | print("email here") |
| | raise gr.Error("Please provide a valid email address.") |
| | |
| |
|
| | |
| | if model.lower() in set([m.lower() for m in results["val"]["Model"]]) and organisation.lower() in set([o.lower() for o in results["val"]["Organisation"]]): |
| | print("model org combo here") |
| | raise gr.Error("This model has been already submitted.") |
| |
|
| | if path_to_file is None: |
| | print("file missing here") |
| | raise gr.Error("Please attach a file.") |
| |
|
| | tmp_file_output = read_json_file(path_to_file.name) |
| | |
| | if len(tmp_file_output.keys())!=1: |
| | print("file format wrong here") |
| | raise gr.Error("Submission file format incorrect. Please refer to the format description!") |
| | |
| | tmp_output_key = list(tmp_file_output.keys())[0] |
| | if len(tmp_file_output[tmp_output_key].keys())!=100: |
| | print("file not 100 here") |
| | raise gr.Error("File must contain exactly 100 predictions.") |
| | |
| | |
| | time_atm = datetime.datetime.today() |
| | api.upload_file( |
| | repo_id=SUBMISSION_DATASET, |
| | path_or_fileobj=path_to_file.name, |
| | path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_raw_{time_atm}.json", |
| | repo_type="dataset", |
| | token=TOKEN |
| | ) |
| |
|
| | |
| | file_path = path_to_file.name |
| | scores = instruction_scorer(val_dataset_dataframe, file_path , model) |
| |
|
| | path_or_fileobj=f"scored/{organisation}_{model}.json" |
| | save_json_file(path_or_fileobj, scores) |
| |
|
| | |
| | api.upload_file( |
| | repo_id=SUBMISSION_DATASET, |
| | path_or_fileobj=path_or_fileobj, |
| | path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_scored_{time_atm}.json", |
| | repo_type="dataset", |
| | token=TOKEN |
| | ) |
| |
|
| | |
| | eval_entry = { |
| | "Model": model, |
| | "Method":method, |
| | "Organisation": organisation, |
| | "URL": url, |
| | "All":scores["average"], |
| | "Time":scores["time"], |
| | "Shopping":scores["shopping"], |
| | "Navigation":scores["navigation-transportation"], |
| | "Abstract":scores["abstract"], |
| | "Application Usage":scores["app"], |
| | "Web Usage":scores["web"], |
| | "Infographic":scores["infographics"], |
| | "Miscellaneous Natural Scenes": scores["misc"] |
| | } |
| |
|
| | val_results_dataframe = get_dataframe_from_results(results=results, split="val") |
| | val_results_dataframe = pd.concat([val_results_dataframe, pd.DataFrame([eval_entry])], ignore_index=True) |
| | val_results_dataframe.to_csv('contextual_val_results.csv', index=False) |
| |
|
| | api.upload_file( |
| | repo_id=RESULTS_DATASET, |
| | path_or_fileobj="contextual_val_results.csv", |
| | path_in_repo=f"contextual_val_results.csv", |
| | repo_type="dataset", |
| | token=TOKEN |
| | ) |
| |
|
| | contact_info = { |
| | "Model": model, |
| | "URL": url, |
| | "Organisation": organisation, |
| | "Mail": mail, |
| | } |
| |
|
| | contacts_dataframe = contact_infos["contacts"].to_pandas() |
| | contacts_dataframe = pd.concat([contacts_dataframe, pd.DataFrame([contact_info])], ignore_index=True) |
| | contacts_dataframe.to_csv('contacts.csv', index=False) |
| |
|
| | api.upload_file( |
| | repo_id=CONTACT_DATASET, |
| | path_or_fileobj="contacts.csv", |
| | path_in_repo=f"contacts.csv", |
| | repo_type="dataset", |
| | token=TOKEN |
| | ) |
| |
|
| | return format_log(f"Model {model} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed") |
| |
|
| |
|
| | def refresh(): |
| | results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"} |
| | results = load_dataset(RESULTS_DATASET, data_files= |
| | results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) |
| | val_results_dataframe = get_dataframe_from_results(results=results, split="val") |
| | test_results_dataframe = get_dataframe_from_results(results=results, split="test") |
| | return val_results_dataframe, test_results_dataframe |
| |
|
| | def upload_file(files): |
| | file_paths = [file.name for file in files] |
| | return file_paths |
| |
|
| |
|
| | demo = gr.Blocks() |
| | with demo: |
| | gr.HTML(TITLE) |
| | |
| |
|
| | with gr.Row(): |
| | with gr.Accordion("🧐 Introduction", open=False): |
| | gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
| |
|
| | with gr.Row(): |
| | with gr.Accordion("🎯 Submission Guidelines", open=False): |
| | gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text") |
| |
|
| | with gr.Row(): |
| | with gr.Accordion("📙 Citation", open=False): |
| | citation_button = gr.TextArea( |
| | value=CITATION_BUTTON_TEXT, |
| | label=CITATION_BUTTON_LABEL, |
| | elem_id="citation-button", |
| | ) |
| | with gr.Tab("Results: Test"): |
| | leaderboard_table_test = gr.components.Dataframe( |
| | value=test_results_dataframe, datatype=TYPES, interactive=False, |
| | column_widths=["20%"] |
| | ) |
| | with gr.Tab("Results: Val"): |
| | leaderboard_table_val = gr.components.Dataframe( |
| | value=val_results_dataframe, datatype=TYPES, interactive=False, |
| | column_widths=["20%"] |
| | ) |
| |
|
| | refresh_button = gr.Button("Refresh") |
| | refresh_button.click( |
| | refresh, |
| | inputs=[], |
| | outputs=[ |
| | leaderboard_table_val, |
| | leaderboard_table_test, |
| | ], |
| | ) |
| | with gr.Accordion("Submit a new model for evaluation"): |
| | with gr.Row(): |
| | with gr.Column(): |
| | model_name_textbox = gr.Textbox(label="Model name", type='text') |
| | method_textbox = gr.Textbox(label="Method (LMM or Aug LLM or any other)", type='text') |
| | url_textbox = gr.Textbox(label="URL to model information", type='text') |
| | with gr.Column(): |
| | organisation = gr.Textbox(label="Organisation", type='text') |
| | mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email') |
| | file_output = gr.File() |
| |
|
| |
|
| | submit_button = gr.Button("Submit Eval") |
| | submission_result = gr.Markdown() |
| | submit_button.click( |
| | add_new_eval, |
| | [ |
| | model_name_textbox, |
| | method_textbox, |
| | url_textbox, |
| | file_output, |
| | organisation, |
| | mail |
| | ], |
| | submission_result, |
| | ) |
| |
|
| | scheduler = BackgroundScheduler() |
| | scheduler.add_job(restart_space, "interval", seconds=3600) |
| | scheduler.start() |
| | demo.launch(debug=True) |
| |
|