Spaces:

VOIDER
/

UGI-Leaderboard-Presets

Running

App Files Files Community

UGI-Leaderboard-Presets / app.py

VOIDER

🛡️ Security & Stability Update

a597782 verified about 10 hours ago

raw

history blame contribute delete

23.4 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import plotly.graph_objects as go
	import tempfile
	import os
	import re
	import math
	import threading
	import atexit
	import logging
	from data_loader import DataLoader
	from scoring import ScoringEngine, PRESET_CONFIGS
	from dev_tools import DevSuite
	from config import *
	from about import get_about_markdown

	# === SETUP ===
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Инициализация загрузчика данных
	loader = DataLoader()

	# Thread-safe globals
	_cache_lock = threading.Lock()
	_CACHED_DF = None
	_temp_files = []

	# === CLEANUP ===
	@atexit.register
	def cleanup_temp_files():
	"""Removes temporary CSV files on exit."""
	for f in _temp_files:
	if os.path.exists(f):
	try:
	os.remove(f)
	except OSError:
	pass

	# === DATA ACCESS ===
	def get_dataframe():
	"""Thread-safe lazy loader for dataframe."""
	global _CACHED_DF
	with _cache_lock:
	if _CACHED_DF is None:
	if FORCE_REFRESH_ON_STARTUP:
	logger.info("First load: Clearing cache...")
	loader.clear_cache()

	df = loader.load_data()
	if not df.empty:
	_CACHED_DF = ScoringEngine(df).calculate_all()
	else:
	_CACHED_DF = df
	return _CACHED_DF

	def invalidate_cache():
	"""Unified cache invalidation logic."""
	global _CACHED_DF
	with _cache_lock:
	_CACHED_DF = None
	deleted = loader.clear_cache()
	return deleted

	# === FORMATTING & UI HELPERS ===
	def get_header_content(df):
	count = len(df) if df is not None else 0
	current_time = loader.last_updated
	return f"""
	# 🏆 UGI Leaderboard: Presets Edition v3.7
	Last Updated: {current_time} \| Models: {count} \| PID: {os.getpid()}
	"""

	def format_params(row):
	total = row.get('Total Parameters', np.nan)
	active = row.get('Active Parameters', np.nan)

	if pd.isna(total) or total <= 0:
	return "N/A"

	def fmt(x):
	try:
	val = float(x)
	if val <= 0: return "?"
	if val < 1: return f"{val*1000:.0f}M"
	return f"{val:.1f}B"
	except (ValueError, TypeError):
	return "?"

	formatted_total = fmt(total)

	# Show active params if MoE
	if pd.notna(active) and active > 0 and active < total:
	return f"{formatted_total} (Act: {fmt(active)})"

	return formatted_total

	def escape_markdown(text):
	return re.sub(r'([\[\]()\*_#~`])', r'\\\1', str(text))

	def format_model_link(row):
	name = str(row.get('author/model_name', 'Unknown'))
	link = row.get('Model Link', '')
	safe_name = escape_markdown(name)
	if pd.notna(link) and isinstance(link, str) and link.startswith('http'):
	return f"[{safe_name}]({link})"
	return safe_name

	def get_architecture_choices(df):
	if df is None or df.empty:
	return []
	valid_archs = [a for a in df['Architecture'].dropna().unique()
	if str(a).lower() not in ['unknown', 'nan', 'null', 'none']]
	return sorted(valid_archs)

	# === FILTERING LOGIC (Refactored) ===

	def apply_search_filter(df, query):
	if not query:
	return df
	return df[
	df['author/model_name'].astype(str).str.contains(query, case=False, na=False) \|
	df['Architecture'].astype(str).str.contains(query, case=False, na=False)
	]

	def apply_param_filter(df, param_min, param_max, proprietary):
	has_params = df['Total Parameters'].notna() & (df['Total Parameters'] > 0)

	p_min = float(param_min) if param_min is not None else 0.0
	p_max = float(param_max) if param_max is not None else 99999.0

	in_range = (df['Total Parameters'] >= p_min) & (df['Total Parameters'] <= p_max)

	if proprietary:
	# Include if in range OR if proprietary (no params known)
	return df[(has_params & in_range) \| (~has_params)]
	else:
	# Strict range check
	return df[has_params & in_range]

	def apply_date_filter(df, date_preset, date_start, date_end):
	if date_preset == "All Time" or 'Release Date' not in df.columns:
	return df

	temp_dates = pd.to_datetime(df['Release Date'], errors='coerce')
	now = pd.Timestamp.now()
	mask = pd.Series(True, index=df.index)

	if date_preset == "Last Week":
	mask = temp_dates >= (now - pd.Timedelta(days=7))
	elif date_preset == "Last Month":
	mask = temp_dates >= (now - pd.Timedelta(days=30))
	elif date_preset == "Last Year":
	mask = temp_dates >= (now - pd.Timedelta(days=365))
	elif date_preset == "Custom Range":
	if date_start:
	s = pd.to_datetime(date_start, errors='coerce')
	if pd.notna(s): mask &= (temp_dates >= s)
	if date_end:
	e = pd.to_datetime(date_end, errors='coerce')
	if pd.notna(e): mask &= (temp_dates <= e)

	return df[mask]

	def filter_leaderboard_pipeline(df, preset, query, param_min, param_max, proprietary,
	moe_only, thinking_mode, model_types, architecture, top_n,
	balance_filter, date_preset, date_start, date_end):
	if df is None or df.empty:
	return pd.DataFrame(), pd.DataFrame()

	# 1. Search
	df = apply_search_filter(df, query)

	# 2. Parameters
	df = apply_param_filter(df, param_min, param_max, proprietary)

	# 3. MoE
	if moe_only:
	df = df[df['Active Parameters'] < df['Total Parameters']]

	# 4. Thinking
	if thinking_mode == "Hide Thinking":
	df = df[~df['Is Thinking Model']]
	elif thinking_mode == "Only Thinking":
	df = df[df['Is Thinking Model']]

	# 5. Types
	type_mask = pd.Series(False, index=df.index)
	for m_type, col in [("Foundation", "Is Foundation"), ("Finetuned", "Is Finetuned"), ("Merged", "Is Merged")]:
	if m_type in model_types and col in df.columns:
	type_mask \|= df[col]
	if type_mask.any():
	df = df[type_mask]

	# 6. Architecture
	if architecture and architecture != "All":
	df = df[df['Architecture'] == architecture]

	# 7. Balance
	if balance_filter != "Show All":
	threshold = 0.7 if "Perfect" in balance_filter else (0.5 if "Good" in balance_filter else 0.3)
	target_col = "Score_💎 Perfect Balance"
	if target_col in df.columns:
	df = df[df[target_col] >= threshold]

	# 8. Date
	df = apply_date_filter(df, date_preset, date_start, date_end)

	# 9. Sorting & Top N
	score_col = f"Score_{preset}"
	if score_col not in df.columns:
	return pd.DataFrame(), pd.DataFrame()

	df = df.sort_values(score_col, ascending=False).head(top_n).copy()

	if df.empty:
	return pd.DataFrame(), pd.DataFrame()

	# 10. Formatting for Display
	display_df = df.copy()
	display_df['Rank'] = range(1, len(display_df) + 1)
	display_df['Model Name'] = display_df.apply(format_model_link, axis=1)
	display_df['Parameters'] = display_df.apply(format_params, axis=1)
	display_df['Architecture'] = display_df['Architecture'].astype(str)
	display_df['Date'] = pd.to_datetime(display_df['Release Date'], errors='coerce').dt.strftime('%Y-%m-%d').fillna('-')

	display_df = display_df.rename(columns={score_col: "⭐ Score"})
	display_cols = ['Rank', 'Model Name', "⭐ Score", 'Date', 'Badges', 'Parameters', 'Architecture']

	return display_df[display_cols], df

	# === COMPARISON & UTILS ===

	def search_models(df, query, limit=10):
	"""Efficient search for model selection."""
	if not query or df is None:
	return gr.update(choices=[])
	mask = df['author/model_name'].astype(str).str.contains(query, case=False, na=False)
	matches = df.loc[mask, 'author/model_name'].head(limit).tolist()
	return gr.update(choices=matches)

	def compare_models(df, model_names_text):
	if df is None or not model_names_text:
	return None, pd.DataFrame()

	targets = [x.strip() for x in model_names_text.split('\n') if x.strip()]
	subset = df[df['author/model_name'].isin(targets)].copy()

	if subset.empty:
	return None, pd.DataFrame()

	# Radar Chart
	fig = go.Figure()
	# Use metrics from config
	labels = list(COMPARE_METRICS.keys())
	cols = list(COMPARE_METRICS.values())

	for _, row in subset.iterrows():
	values = []
	for col in cols:
	val = float(row.get(col, 0))
	if pd.isna(val): val = 0
	values.append(val)

	# Close the loop
	values.append(values[0])
	plot_labels = labels + [labels[0]]

	fig.add_trace(go.Scatterpolar(
	r=values, theta=plot_labels,
	fill='toself',
	name=row['author/model_name'][:30]
	))

	fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 1])), showlegend=True, height=500)

	# Comparison Table
	compare_cols = ['author/model_name', 'Total Parameters', 'Score_🌌 Divine RP'] + cols
	# Rename for display
	rename_map = {
	'author/model_name': 'Model',
	'Total Parameters': 'Params',
	'Score_🌌 Divine RP': 'Divine RP'
	}
	# Add metric renames
	for k, v in COMPARE_METRICS.items():
	rename_map[v] = k

	compare_df = subset[compare_cols].rename(columns=rename_map)
	return fig, compare_df

	def calculate_custom_score(df, weights_dict):
	if df is None or df.empty:
	return pd.DataFrame()

	# Using lightweight engine to avoid full re-calc
	temp_engine = ScoringEngine(df)
	# Calculate score on the original dataframe (the engine copies it internally)
	scores = temp_engine.calculate_weighted_score(weights_dict).round(3)

	# Create result view
	result = df.copy()
	result['Custom_Score'] = scores
	result = result.sort_values('Custom_Score', ascending=False).head(50)

	display = result[['author/model_name', 'Custom_Score', 'Total Parameters', 'Badges']].copy()
	display = display.rename(columns={'author/model_name': 'Model', 'Custom_Score': '⭐ Score', 'Total Parameters': 'Params'})
	return display

	def run_diagnostics(df):
	if df is None or df.empty:
	return "❌ No data loaded", pd.DataFrame(), pd.DataFrame()
	dev = DevSuite(df)
	return dev.run_all_tests(), dev.get_anomalies_df(), dev.get_statistics_df()

	def clear_and_reload_ui():
	deleted = invalidate_cache()
	new_df = get_dataframe()
	status = f"✅ Cache cleared!\nDeleted: {', '.join(deleted) if deleted else 'None'}\n🔄 Data reloaded: {len(new_df)} rows"
	return new_df, status

	def export_handler(df):
	if df is None or df.empty:
	return gr.update(value=None, visible=False)

	try:
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w', encoding='utf-8')
	_temp_files.append(temp_file.name) # Track for cleanup
	df.to_csv(temp_file.name, index=False)
	return gr.update(value=temp_file.name, visible=True)
	except Exception as e:
	logger.error(f"Export failed: {e}")
	return gr.update(value=None, visible=False)

	# === UI BUILDER ===

	with gr.Blocks() as demo:
	# Initial Load
	initial_df = get_dataframe()
	df_state = gr.State(initial_df)
	filtered_raw_state = gr.State()

	# Dynamic Params for Sliders
	actual_max_params = 100
	if initial_df is not None and not initial_df.empty:
	m = initial_df['Total Parameters'].max()
	if pd.notna(m):
	actual_max_params = math.ceil(m)

	max_params_state = gr.State(actual_max_params)

	# Header
	header_md = gr.Markdown(get_header_content(initial_df))

	with gr.Tabs():
	with gr.Tab("🏅 Leaderboard"):
	# Controls
	with gr.Row(variant="panel", equal_height=True):
	with gr.Column(scale=5):
	preset_dropdown = gr.Radio(
	choices=list(PRESET_CONFIGS.keys()) + ["⚡ Efficiency King"],
	value="🌌 Divine RP",
	label="🎯 Preset",
	interactive=True
	)
	with gr.Column(scale=1, min_width=150):
	refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary", size="lg")

	# Filters
	with gr.Accordion("⚙️ Hardware & Filters", open=False):
	with gr.Row():
	param_min = gr.Slider(0, actual_max_params, 0, step=1, label="Min Parameters (B)")
	param_max = gr.Slider(0, actual_max_params, actual_max_params, step=1, label="Max Parameters (B)")

	with gr.Row():
	date_preset = gr.Radio(
	choices=["All Time", "Last Week", "Last Month", "Last Year", "Custom Range"],
	value="All Time",
	label="📅 Release Date Filter"
	)
	with gr.Row(visible=False) as custom_date_row:
	date_start = gr.Textbox(placeholder="YYYY-MM-DD", label="From")
	date_end = gr.Textbox(placeholder="YYYY-MM-DD", label="To")

	with gr.Row():
	proprietary_check = gr.Checkbox(value=True, label="Include Proprietary (unknown params)")
	moe_check = gr.Checkbox(value=False, label="MoE Only")
	thinking_mode = gr.Radio(["Show All", "Hide Thinking", "Only Thinking"], value="Show All", label="Reasoning Models")
	with gr.Row():
	model_types = gr.CheckboxGroup(["Foundation", "Finetuned", "Merged"], value=["Foundation", "Finetuned", "Merged"], label="Model Types")
	arch_dropdown = gr.Dropdown(["All"] + get_architecture_choices(initial_df), value="All", label="Architecture")
	top_n_slider = gr.Slider(10, 500, DEFAULT_TOP_N, step=10, label="Top N")

	with gr.Row():
	balance_filter = gr.Radio(
	choices=["Show All", "💎 Perfect (≥0.7)", "🏅 Good (≥0.5)", "⚖️ Basic (≥0.3)"],
	value="Show All",
	label="🛡️ Robustness Filter",
	info="Filters out models with weak spots."
	)

	search_box = gr.Textbox(label="🔍 Search Models", placeholder="e.g., Llama, Qwen...")
	leaderboard_table = gr.Dataframe(
	datatype=["number", "markdown", "number", "str", "str", "str", "str"],
	wrap=True, interactive=False
	)

	with gr.Row():
	with gr.Column(scale=1):
	export_btn = gr.Button("📥 Export CSV", variant="primary", size="sm")
	with gr.Column(scale=4):
	export_file = gr.File(label="Download CSV", visible=False, height=50)

	with gr.Tab("⚖️ Compare"):
	gr.Markdown("### Compare Multiple Models")
	with gr.Row():
	with gr.Column(scale=2):
	search_compare = gr.Textbox(label="🔍 Search to Add Models", placeholder="Type model name...")
	search_results_radio = gr.Radio(choices=[], label="Select from results", interactive=True)
	add_model_btn = gr.Button("➕ Add Model", variant="secondary")
	with gr.Column(scale=3):
	compare_textbox = gr.Textbox(label="📋 Comparing (one per line)", lines=8, placeholder="Add models using search...")

	compare_btn = gr.Button("📊 Generate Comparison", variant="primary")
	with gr.Row():
	radar_plot = gr.Plot(label="📈 Radar Chart")
	compare_table = gr.Dataframe(label="📊 Comparison Table")

	with gr.Tab("🎨 Custom Weights"):
	gr.Markdown("### Create Your Own Preset")
	with gr.Row():
	with gr.Column():
	w_textbook = gr.Slider(0, 1, 0.12, step=0.01, label="📚 Textbook Knowledge")
	w_popculture = gr.Slider(0, 1, 0.08, step=0.01, label="🎬 Pop Culture")
	w_worldmodel = gr.Slider(0, 1, 0.10, step=0.01, label="🌍 World Model")
	w_instruction = gr.Slider(0, 1, 0.10, step=0.01, label="📋 Instruction Following")
	w_style = gr.Slider(0, 1, 0.25, step=0.01, label="✍️ Writing Style")
	with gr.Column():
	w_originality = gr.Slider(0, 1, 0.10, step=0.01, label="✨ Originality")
	w_dialogue = gr.Slider(0, 1, 0.15, step=0.01, label="💬 Dialogue Balance")
	w_unbound = gr.Slider(0, 1, 0.05, step=0.01, label="🔓 Unbound")
	w_redundancy = gr.Slider(0, 1, 0.05, step=0.01, label="🧹 Low Redundancy")

	weight_sum_display = gr.Markdown("Total Weight: 1.00")
	calc_custom_btn = gr.Button("🎯 Calculate Custom Score", variant="primary")
	custom_results = gr.Dataframe(label="Top 50 Models")

	with gr.Tab("📖 About"):
	gr.Markdown(get_about_markdown(loader.last_updated))

	# Diagnostics Tab (Conditional)
	diag_btn, clear_btn = None, None
	if SHOW_DIAGNOSTICS:
	with gr.Tab("🛠️ Diagnostics"):
	with gr.Row():
	diag_btn = gr.Button("🧪 Run Diagnostics", variant="primary")
	clear_btn = gr.Button("🗑️ Clear Cache & Reload Data", variant="stop")

	cache_status = gr.Textbox(label="Status", lines=3, interactive=False)
	diag_report = gr.Code(label="📋 Diagnostic Report", language="markdown")

	with gr.Accordion("🔍 Anomalies", open=False):
	anomalies_table = gr.Dataframe(label="Detected Anomalies")
	with gr.Accordion("📊 Statistics", open=False):
	stats_table = gr.Dataframe(label="Normalization Statistics")

	# === EVENT BINDINGS ===

	# 1. Date Toggle
	date_preset.change(lambda x: gr.update(visible=(x == "Custom Range")), inputs=[date_preset], outputs=[custom_date_row])

	# 2. Filter Inputs Bundle
	filter_inputs = [
	df_state, # 0
	preset_dropdown, # 1
	search_box, # 2
	param_min, # 3
	param_max, # 4
	proprietary_check, # 5
	moe_check, # 6
	thinking_mode, # 7
	model_types, # 8
	arch_dropdown, # 9
	top_n_slider, # 10
	balance_filter, # 11
	date_preset, # 12
	date_start, # 13
	date_end # 14
	]
	filter_outputs = [leaderboard_table, filtered_raw_state]

	# 3. Slider Interaction Wrapper
	def slider_interaction(max_p_val, *args):
	# args contains the values from filter_inputs

	# CORRECTED INDICES:
	# 0: df, 1: preset, 2: search
	# 3: param_min, 4: param_max, 5: proprietary_check

	p_min = args[3]
	p_max = args[4]
	prop_val = args[5]

	is_restricted = (p_min > 0 or p_max < max_p_val)

	new_prop_val = False if is_restricted else prop_val
	new_interactive = not is_restricted
	new_label = "Include Proprietary (Disabled by params)" if is_restricted else "Include Proprietary (unknown params)"

	cb_update = gr.update(value=new_prop_val, label=new_label, interactive=new_interactive)

	# Modify list for pipeline call
	pipeline_args = list(args)
	pipeline_args[5] = new_prop_val

	table, raw = filter_leaderboard_pipeline(*pipeline_args)

	return cb_update, table, raw

	slider_args = [max_params_state] + filter_inputs
	slider_outs = [proprietary_check] + filter_outputs

	param_min.change(slider_interaction, inputs=slider_args, outputs=slider_outs)
	param_max.change(slider_interaction, inputs=slider_args, outputs=slider_outs)

	# 4. General Filter Change
	for i, inp in enumerate(filter_inputs):
	# Skip df(0), and parameters sliders (3, 4) to avoid double firing
	if i not in [0, 3, 4]:
	inp.change(filter_leaderboard_pipeline, inputs=filter_inputs, outputs=filter_outputs)

	# 5. Refresh
	def refresh_wrapper():
	invalidate_cache()
	new_df = get_dataframe()
	if new_df is not None and not new_df.empty:
	archs = ["All"] + get_architecture_choices(new_df)
	return new_df, gr.update(choices=archs), get_header_content(new_df)
	return new_df, gr.update(), get_header_content(None)

	refresh_btn.click(refresh_wrapper, outputs=[df_state, arch_dropdown, header_md]).then(
	filter_leaderboard_pipeline, inputs=filter_inputs, outputs=filter_outputs
	)

	# 6. Export
	export_btn.click(export_handler, inputs=[filtered_raw_state], outputs=[export_file])

	# 7. Compare
	search_compare.change(search_models, inputs=[df_state, search_compare], outputs=[search_results_radio])
	add_model_btn.click(lambda t, s: t + ("\n" if t else "") + s if s else t, inputs=[compare_textbox, search_results_radio], outputs=[compare_textbox])
	compare_btn.click(compare_models, inputs=[df_state, compare_textbox], outputs=[radar_plot, compare_table])

	# 8. Custom Weights
	weight_inputs = [w_textbook, w_popculture, w_worldmodel, w_instruction, w_style, w_originality, w_dialogue, w_unbound, w_redundancy]
	for w in weight_inputs: w.change(lambda args: f"Total Weight:* {sum(args):.2f}", inputs=weight_inputs, outputs=[weight_sum_display])

	calc_custom_btn.click(
	lambda df, *args: calculate_custom_score(df, {k: v for k, v in zip(['Textbook', 'Pop Culture', 'World Model', 'Instruction', 'Writing Style', 'Originality', 'Dialogue', 'Unbound', 'Redundancy'], args)}),
	inputs=[df_state] + weight_inputs,
	outputs=[custom_results]
	)

	# 9. Diagnostics
	if SHOW_DIAGNOSTICS and diag_btn and clear_btn:
	diag_btn.click(run_diagnostics, inputs=[df_state], outputs=[diag_report, anomalies_table, stats_table])
	clear_btn.click(clear_and_reload_ui, outputs=[df_state, cache_status]).then(
	filter_leaderboard_pipeline, inputs=filter_inputs, outputs=filter_outputs
	)

	# Load on Start
	demo.load(filter_leaderboard_pipeline, inputs=filter_inputs, outputs=filter_outputs)

	if __name__ == "__main__":
	demo.launch()