Spaces:

UniVA-Agent
/

UniVA-Leaderboard

Running

App Files Files Community

UniVA-Leaderboard / src /about.py

Rui1121

Update src/about.py

6c6d75e verified 28 days ago

raw

history blame contribute delete

3.91 kB

	from dataclasses import dataclass
	from enum import Enum

	@dataclass
	class Task:
	benchmark: str
	metric: str
	col_name: str

	# Select your tasks here
	# ---------------------------------------------------
	class Tasks(Enum):
	# task_key in the json file, metric_key in the json file, name to display in the leaderboard

	# --- Generation (Table 1 in Paper) ---
	# 1. LongText2Video
	longtext_clip = Task("LongText2Video", "clip_score", "LongText2Video / CLIP")
	longtext_dino = Task("LongText2Video", "dino_score", "LongText2Video / DINO")
	longtext_mllm = Task("LongText2Video", "mllm_judge", "LongText2Video / MLLM")

	# 2. Entities2Video
	entities_clip = Task("Entities2Video", "clip_score", "Entities2Video / CLIP")
	entities_dino = Task("Entities2Video", "dino_score", "Entities2Video / DINO")
	entities_mllm = Task("Entities2Video", "mllm_judge", "Entities2Video / MLLM")

	# 3. Video2Video
	v2v_clip = Task("Video2Video", "clip_score", "Video2Video / CLIP")
	v2v_dino = Task("Video2Video", "dino_score", "Video2Video / DINO")
	v2v_mllm = Task("Video2Video", "mllm_judge", "Video2Video / MLLM")

	# --- Long-Video Tasks (Table 2 in Paper) ---
	# 4. Understanding (Table 2a)
	understanding_acc = Task("Understanding", "accuracy", "LongVideo QA / Acc")

	# 5. Editing (Table 2b)
	editing_clip = Task("Editing", "clip_score", "Editing / CLIP")
	editing_dino = Task("Editing", "dino_score", "Editing / DINO")
	editing_mllm = Task("Editing", "mllm_judge", "Editing / MLLM")

	# 6. Segmentation (Table 2c)
	segmentation_j = Task("Segmentation", "j_score", "Segmentation / J")
	segmentation_f = Task("Segmentation", "f_score", "Segmentation / F")
	segmentation_jf = Task("Segmentation", "j_and_f", "Segmentation / J&F")

	NUM_FEWSHOT = 0

	# ---------------------------------------------------
	# Your leaderboard name
	TITLE = """<h1 align="center" id="space-title">UniVA-Bench Leaderboard</h1>"""

	# What does your leaderboard evaluate?
	INTRODUCTION_TEXT = """
	UniVA-Bench is an agent-oriented benchmark for unified video intelligence,
	covering Understanding, Generation, Editing, Segmentation, and agentic probing.
	We report CLIP / DINO / MLLM preference, segmentation J/F/J&F, and long-video QA accuracy,
	following the evaluation protocol described in our paper.
	"""

	# Which evaluations are you running? how can people reproduce what you have?
	LLM_BENCHMARKS_TEXT = f"""
	## How it works
	This leaderboard reports the performance of UniVA and baseline models on UniVA-Bench, following
	the evaluation protocol described in our paper. All scores are obtained with our internal evaluation
	pipeline, using the same task definitions, metrics, and settings as in Section 4 and Appendix B.

	## Reproducibility
	Our implementation and evaluation scripts will be released in a public repository in a future update.
	Before that, please refer to the paper for detailed descriptions of UniVA-Bench, including dataset
	construction, splits, metrics, and experimental setups. If you need additional information to reproduce
	these results, feel free to contact the authors.
	"""

	# Submission / queue
	EVALUATION_QUEUE_TEXT = """
	Submissions are currently disabled for the UniVA-Bench leaderboard.
	We will open public submission once the evaluation backend is ready.
	"""

	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
	CITATION_BUTTON_TEXT = r"""@misc{liang2025univauniversalvideoagent,
	title={UniVA: Universal Video Agent towards Open-Source Next-Generation Video Generalist},
	author={Zhengyang Liang and Daoan Zhang and Huichi Zhou and Rui Huang and Bobo Li and
	Yuechen Zhang and Shengqiong Wu and Xiaohan Wang and Jiebo Luo and
	Lizi Liao and Hao Fei},
	year={2025},
	eprint={2511.08521},
	archivePrefix={arXiv},
	primaryClass={cs.CV},
	url={https://arxiv.org/abs/2511.08521}
	}"""