rrr-leaderboard

Running

rrr-leaderboard / app.py

pasha

Cleanup and init

e3ed394 6 months ago

1.14 kB

	import streamlit as st
	import pandas as pd

	# Load CSV file
	DATA_FILE = "test_all.csv"
	df = pd.read_csv(DATA_FILE)

	# Normalize column names
	df.columns = df.columns.str.strip()

	# Header interface
	st.title("MSNP Accuracy Leaderboard")
	st.markdown("""
	[GitHub Repository](https://github.com/EvilFreelancer/rrr-benchmark)
	The table shows the accuracy and performance of the models on the
	[rrr-benchmark](https://huggingface.co/datasets/evilfreelancer/rrr-benchmark) dataset.
	""")

	# Sidebar filtering
	with st.sidebar:
	st.header("Filters")
	model_options = st.multiselect("Select model:", options=sorted(df["model"].dropna().unique()))

	# Apply filters
	filtered_df = df.copy()
	if model_options:
	filtered_df = filtered_df[filtered_df["model"].isin(model_options)]

	# Column formatting for display
	format_dict = {
	"accuracy": "{:.2%}".format,
	"avg_response_time": "{:.3f}".format,
	"avg_token_count": "{:.1f}".format
	}

	# Display the table sorted by accuracy in descending order
	st.dataframe(
	filtered_df.sort_values(by="accuracy", ascending=False).reset_index(drop=True).style.format(format_dict)
	)