|
|
import pandas as pd
|
|
|
import numpy as np
|
|
|
from scoring import METRIC_MAP, PRESET_CONFIGS
|
|
|
from config import POCKET_MODEL_THRESHOLD
|
|
|
|
|
|
class DevSuite:
|
|
|
def __init__(self, df):
|
|
|
self.df = df
|
|
|
self.report = {
|
|
|
"summary": {"critical": 0, "medium": 0, "low": 0, "tests_passed": 0},
|
|
|
"critical_issues": [],
|
|
|
"medium_issues": [],
|
|
|
"low_issues": [],
|
|
|
"anomalies": [],
|
|
|
"statistics": {}
|
|
|
}
|
|
|
|
|
|
def run_all_tests(self):
|
|
|
if self.df is None or self.df.empty:
|
|
|
self._add_issue("critical", "DataFrame is empty or None.")
|
|
|
return self._generate_markdown_report()
|
|
|
|
|
|
self._test_normalization_bounds()
|
|
|
self._test_parameter_scaling()
|
|
|
self._test_badges_logic()
|
|
|
self._test_weight_sums()
|
|
|
self._test_score_ranges()
|
|
|
self._collect_normalization_stats()
|
|
|
|
|
|
return self._generate_markdown_report()
|
|
|
|
|
|
def get_anomalies_df(self):
|
|
|
return pd.DataFrame(self.report["anomalies"]) if self.report["anomalies"] else pd.DataFrame()
|
|
|
|
|
|
def get_statistics_df(self):
|
|
|
return pd.DataFrame(self.report["statistics"]).T if self.report["statistics"] else pd.DataFrame()
|
|
|
|
|
|
def _test_normalization_bounds(self):
|
|
|
"""Check if normalized metrics are within [0, 1]."""
|
|
|
norm_cols = [v[0] for k, v in METRIC_MAP.items() if v[0] in self.df.columns]
|
|
|
|
|
|
for col in norm_cols:
|
|
|
|
|
|
values = self.df[col].dropna()
|
|
|
|
|
|
if values.empty:
|
|
|
continue
|
|
|
|
|
|
|
|
|
if values.min() < -1e-6 or values.max() > 1.0 + 1e-6:
|
|
|
self._add_issue("critical", f"Normalization bounds broken in '{col}': Range [{values.min():.3f}, {values.max():.3f}]")
|
|
|
else:
|
|
|
self.report["summary"]["tests_passed"] += 1
|
|
|
|
|
|
def _test_parameter_scaling(self):
|
|
|
"""Check if parameters look scaled correctly (Billion scale, not Million or arbitrary)."""
|
|
|
if 'Total Parameters' in self.df.columns:
|
|
|
|
|
|
|
|
|
max_params = self.df['Total Parameters'].max()
|
|
|
|
|
|
if pd.isna(max_params) or max_params == 0:
|
|
|
self._add_issue("critical", "Parameter column appears empty or zeroed.")
|
|
|
elif max_params < 100:
|
|
|
self._add_issue("critical", f"Parameter scaling suspicious: Max parameter found is {max_params}. Expected > 100 for large models.")
|
|
|
else:
|
|
|
self.report["summary"]["tests_passed"] += 1
|
|
|
|
|
|
def _test_badges_logic(self):
|
|
|
"""Check if badges are assigned consistent with config thresholds."""
|
|
|
if 'Badges' in self.df.columns and 'Total Parameters' in self.df.columns:
|
|
|
|
|
|
pocket_models = self.df[self.df['Badges'].astype(str).str.contains("π€", na=False)]
|
|
|
|
|
|
if not pocket_models.empty:
|
|
|
max_pocket_params = pocket_models['Total Parameters'].max()
|
|
|
|
|
|
if max_pocket_params > POCKET_MODEL_THRESHOLD + 0.1:
|
|
|
self._add_issue("medium", f"Pocket badge assigned to model with {max_pocket_params}B params (Threshold: {POCKET_MODEL_THRESHOLD}B).")
|
|
|
else:
|
|
|
self.report["summary"]["tests_passed"] += 1
|
|
|
else:
|
|
|
self.report["summary"]["tests_passed"] += 1
|
|
|
|
|
|
def _test_weight_sums(self):
|
|
|
"""Check if preset weights sum to 1.0."""
|
|
|
for preset, weights in PRESET_CONFIGS.items():
|
|
|
if isinstance(weights, dict) and 'special_type' not in weights:
|
|
|
total_weight = sum(weights.values())
|
|
|
if abs(total_weight - 1.0) > 1e-4:
|
|
|
self._add_issue("medium", f"Preset '{preset}' weights sum to {total_weight:.2f} (expected 1.0)")
|
|
|
else:
|
|
|
self.report["summary"]["tests_passed"] += 1
|
|
|
|
|
|
def _test_score_ranges(self):
|
|
|
"""Check if final scores are within reasonable bounds [0, 1.1]."""
|
|
|
score_cols = [c for c in self.df.columns if c.startswith("Score_")]
|
|
|
for col in score_cols:
|
|
|
if 'Efficiency' in col: continue
|
|
|
|
|
|
vals = self.df[col].dropna()
|
|
|
if not vals.empty and (vals.min() < 0 or vals.max() > 1.1):
|
|
|
self._add_issue("medium", f"Score out of range in {col}: [{vals.min():.2f}, {vals.max():.2f}]")
|
|
|
else:
|
|
|
self.report["summary"]["tests_passed"] += 1
|
|
|
|
|
|
def _collect_normalization_stats(self):
|
|
|
"""Collect statistics for normalized columns."""
|
|
|
norm_cols = [v[0] for k, v in METRIC_MAP.items() if v[0] in self.df.columns]
|
|
|
for col in norm_cols:
|
|
|
values = self.df[col].dropna()
|
|
|
|
|
|
self.report["statistics"][col] = {
|
|
|
"min": float(values.min()) if not values.empty else 0,
|
|
|
"max": float(values.max()) if not values.empty else 0,
|
|
|
"mean": float(values.mean()) if not values.empty else 0,
|
|
|
"std": float(values.std()) if not values.empty else 0
|
|
|
}
|
|
|
|
|
|
def _add_issue(self, level, message):
|
|
|
self.report["summary"][level] += 1
|
|
|
self.report[f"{level}_issues"].append(message)
|
|
|
|
|
|
def _generate_markdown_report(self):
|
|
|
r = self.report
|
|
|
md = [
|
|
|
f"## Executive Summary",
|
|
|
f"- **Tests Passed**: {r['summary']['tests_passed']}",
|
|
|
f"- **Critical Issues**: {r['summary']['critical']}",
|
|
|
f"- **Medium Issues**: {r['summary']['medium']}"
|
|
|
]
|
|
|
|
|
|
if r['critical_issues']:
|
|
|
md.append("\n### π΄ Critical Issues")
|
|
|
md.extend([f"- {i}" for i in r['critical_issues']])
|
|
|
|
|
|
if r['medium_issues']:
|
|
|
md.append("\n### π Medium Issues")
|
|
|
md.extend([f"- {i}" for i in r['medium_issues']])
|
|
|
|
|
|
if r['low_issues']:
|
|
|
md.append("\n### π‘ Low Issues")
|
|
|
md.extend([f"- {i}" for i in r['low_issues']])
|
|
|
|
|
|
if not r['critical_issues'] and not r['medium_issues']:
|
|
|
md.append("\n### β
System Status: Healthy")
|
|
|
|
|
|
return "\n".join(md) |