openhands openhands commited on
Commit
70749cd
·
1 Parent(s): 6d3b657

Show OpenHands on alternate agents page for shared models

Browse files

This commit was created by an AI assistant (OpenHands) on behalf of the user.

Co-authored-by: openhands <openhands@all-hands.dev>

Files changed (1) hide show
  1. alternative_agents_page.py +42 -4
alternative_agents_page.py CHANGED
@@ -13,9 +13,15 @@ This page is intentionally a single Overall view (no per-category
13
  subpages) — the alternative-agents dataset is small (one row per
14
  harness × model) and the goal is "show me all the alternatives at a
15
  glance", not "drill into Issue Resolution for Codex".
 
 
 
 
 
16
  """
17
  import matplotlib
18
  matplotlib.use('Agg')
 
19
  import gradio as gr
20
 
21
  from simple_data_loader import SimpleLeaderboardViewer
@@ -30,15 +36,45 @@ ALTERNATIVE_AGENTS_INTRO = """
30
  <h2>Alternative Agents</h2>
31
  <p>
32
  Third-party agent harnesses running the OpenHands Index benchmarks.
33
- These rows aren't part of the OpenHands ranking on the
34
- <a href="/home">Home</a> page they're tracked here as a comparison
35
- point. Cost and runtime numbers come from each harness's own
36
- instrumentation and aren't directly comparable across harnesses.
 
37
  </p>
38
  </div>
39
  """
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def build_page():
43
  gr.HTML(ALTERNATIVE_AGENTS_INTRO)
44
 
@@ -57,6 +93,8 @@ def build_page():
57
  )
58
  return
59
 
 
 
60
  create_leaderboard_display(
61
  full_df=test_df,
62
  tag_map=test_tag_map,
 
13
  subpages) — the alternative-agents dataset is small (one row per
14
  harness × model) and the goal is "show me all the alternatives at a
15
  glance", not "drill into Issue Resolution for Codex".
16
+
17
+ To make same-model comparisons easier, the page also appends canonical
18
+ OpenHands rows for any language model that appears in the alternative
19
+ agent dataset. The match is exact, so ``Gemini-3-Pro`` and
20
+ ``Gemini-3.1-Pro`` remain distinct entries.
21
  """
22
  import matplotlib
23
  matplotlib.use('Agg')
24
+ import pandas as pd
25
  import gradio as gr
26
 
27
  from simple_data_loader import SimpleLeaderboardViewer
 
36
  <h2>Alternative Agents</h2>
37
  <p>
38
  Third-party agent harnesses running the OpenHands Index benchmarks.
39
+ To make direct comparisons easier, this page also includes the
40
+ canonical OpenHands row whenever the exact same language model appears
41
+ under an alternative harness. Cost and runtime numbers still come from
42
+ each harness's own instrumentation and aren't directly comparable
43
+ across harnesses.
44
  </p>
45
  </div>
46
  """
47
 
48
 
49
+ def _append_openhands_shared_models(
50
+ alternative_df: pd.DataFrame,
51
+ split: str,
52
+ ) -> pd.DataFrame:
53
+ if alternative_df.empty or "Language Model" not in alternative_df.columns:
54
+ return alternative_df
55
+
56
+ openhands_df, _ = get_full_leaderboard_data(
57
+ split,
58
+ agent_filter=SimpleLeaderboardViewer.AGENT_FILTER_OPENHANDS,
59
+ )
60
+ if openhands_df.empty or "Language Model" not in openhands_df.columns:
61
+ return alternative_df
62
+
63
+ alternative_models = set(
64
+ alternative_df["Language Model"].dropna().astype(str).str.strip()
65
+ )
66
+ if not alternative_models:
67
+ return alternative_df
68
+
69
+ openhands_shared_df = openhands_df[
70
+ openhands_df["Language Model"].astype(str).str.strip().isin(alternative_models)
71
+ ].copy()
72
+ if openhands_shared_df.empty:
73
+ return alternative_df
74
+
75
+ return pd.concat([alternative_df, openhands_shared_df], ignore_index=True, sort=False)
76
+
77
+
78
  def build_page():
79
  gr.HTML(ALTERNATIVE_AGENTS_INTRO)
80
 
 
93
  )
94
  return
95
 
96
+ test_df = _append_openhands_shared_models(test_df, split="test")
97
+
98
  create_leaderboard_display(
99
  full_df=test_df,
100
  tag_map=test_tag_map,