Commit
·
d9ef651
1
Parent(s):
a1c511d
debug updating system
Browse files
app.py
CHANGED
|
@@ -900,16 +900,46 @@ def process_uploaded_file(file, model_name=None, provider=None, agent_framework=
|
|
| 900 |
except Exception as e:
|
| 901 |
return None, f"Error processing file: {str(e)}"
|
| 902 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 903 |
def aggregate_runs_to_csv():
|
| 904 |
"""
|
| 905 |
Aggregate all JSON files in runs/ directory into results.csv.
|
| 906 |
This consolidates all uploaded evaluation results into a single CSV file.
|
| 907 |
Deduplicates records based on (Model, Provider, Agent Framework) combination,
|
| 908 |
keeping the most recent entry for each unique combination.
|
|
|
|
| 909 |
"""
|
| 910 |
runs_path = get_runs_path()
|
| 911 |
results_path = get_results_path()
|
| 912 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 913 |
# Gather all JSON files with their modification times
|
| 914 |
records_with_time = []
|
| 915 |
for path in runs_path.glob("*.json"):
|
|
@@ -921,7 +951,10 @@ def aggregate_runs_to_csv():
|
|
| 921 |
except Exception as e:
|
| 922 |
print(f"Warning: Skipping invalid JSON file {path}: {e}")
|
| 923 |
|
| 924 |
-
|
|
|
|
|
|
|
|
|
|
| 925 |
# Create empty CSV with headers
|
| 926 |
fixed_metadata = ["Model", "Provider", "Agent Framework", "Type"]
|
| 927 |
fixed_metrics = ["Overall Pass Rate", "Avg Duration (s)", "Avg Cost ($)"]
|
|
@@ -931,12 +964,12 @@ def aggregate_runs_to_csv():
|
|
| 931 |
return
|
| 932 |
|
| 933 |
# Sort by modification time (most recent first)
|
| 934 |
-
|
| 935 |
|
| 936 |
# Handle legacy column names and infer Type
|
| 937 |
legacy_map = {"Notes": "Agent Framework", "Overall": "Overall Pass Rate"}
|
| 938 |
processed_records = []
|
| 939 |
-
for mtime, record in
|
| 940 |
for old_key, new_key in legacy_map.items():
|
| 941 |
if old_key in record and new_key not in record:
|
| 942 |
record[new_key] = record.pop(old_key)
|
|
|
|
| 900 |
except Exception as e:
|
| 901 |
return None, f"Error processing file: {str(e)}"
|
| 902 |
|
| 903 |
+
def clean_nan_values(record):
|
| 904 |
+
"""Convert NaN values to None for proper CSV serialization."""
|
| 905 |
+
import math
|
| 906 |
+
cleaned = {}
|
| 907 |
+
for key, value in record.items():
|
| 908 |
+
if pd.isna(value) or (isinstance(value, float) and math.isnan(value)):
|
| 909 |
+
cleaned[key] = None
|
| 910 |
+
else:
|
| 911 |
+
cleaned[key] = value
|
| 912 |
+
return cleaned
|
| 913 |
+
|
| 914 |
def aggregate_runs_to_csv():
|
| 915 |
"""
|
| 916 |
Aggregate all JSON files in runs/ directory into results.csv.
|
| 917 |
This consolidates all uploaded evaluation results into a single CSV file.
|
| 918 |
Deduplicates records based on (Model, Provider, Agent Framework) combination,
|
| 919 |
keeping the most recent entry for each unique combination.
|
| 920 |
+
Preserves existing records from results.csv that aren't in runs/ directory.
|
| 921 |
"""
|
| 922 |
runs_path = get_runs_path()
|
| 923 |
results_path = get_results_path()
|
| 924 |
|
| 925 |
+
# First, load existing results.csv to preserve models not in new uploads
|
| 926 |
+
existing_records_with_time = []
|
| 927 |
+
if results_path.exists():
|
| 928 |
+
try:
|
| 929 |
+
df_existing = load_df(results_path)
|
| 930 |
+
if len(df_existing) > 0:
|
| 931 |
+
# Convert existing records to dict format
|
| 932 |
+
for _, row in df_existing.iterrows():
|
| 933 |
+
record = row.to_dict()
|
| 934 |
+
# Clean NaN values
|
| 935 |
+
record = clean_nan_values(record)
|
| 936 |
+
# Use file modification time - 1 day as timestamp (older than new uploads)
|
| 937 |
+
# This ensures new uploads take precedence, but existing records are preserved
|
| 938 |
+
existing_mtime = results_path.stat().st_mtime - 86400 # 1 day ago
|
| 939 |
+
existing_records_with_time.append((existing_mtime, record))
|
| 940 |
+
except Exception as e:
|
| 941 |
+
print(f"Warning: Error loading existing results.csv: {e}")
|
| 942 |
+
|
| 943 |
# Gather all JSON files with their modification times
|
| 944 |
records_with_time = []
|
| 945 |
for path in runs_path.glob("*.json"):
|
|
|
|
| 951 |
except Exception as e:
|
| 952 |
print(f"Warning: Skipping invalid JSON file {path}: {e}")
|
| 953 |
|
| 954 |
+
# Combine existing records with new records from runs/
|
| 955 |
+
all_records_with_time = existing_records_with_time + records_with_time
|
| 956 |
+
|
| 957 |
+
if not all_records_with_time:
|
| 958 |
# Create empty CSV with headers
|
| 959 |
fixed_metadata = ["Model", "Provider", "Agent Framework", "Type"]
|
| 960 |
fixed_metrics = ["Overall Pass Rate", "Avg Duration (s)", "Avg Cost ($)"]
|
|
|
|
| 964 |
return
|
| 965 |
|
| 966 |
# Sort by modification time (most recent first)
|
| 967 |
+
all_records_with_time.sort(key=lambda x: x[0], reverse=True)
|
| 968 |
|
| 969 |
# Handle legacy column names and infer Type
|
| 970 |
legacy_map = {"Notes": "Agent Framework", "Overall": "Overall Pass Rate"}
|
| 971 |
processed_records = []
|
| 972 |
+
for mtime, record in all_records_with_time:
|
| 973 |
for old_key, new_key in legacy_map.items():
|
| 974 |
if old_key in record and new_key not in record:
|
| 975 |
record[new_key] = record.pop(old_key)
|