# app.py """ FastAPI app to inspect Hugging Face transformer model sizing: - Total & trainable parameter counts - Approximate memory usage in bytes / human-readable - Saved model disk size - Basic model config info To run locally: pip install fastapi "uvicorn[standard]" transformers torch uvicorn app:app --reload Endpoints: / → simple HTML form /inspect?model=bert-base-uncased → JSON sizing info """ import os import math import tempfile import shutil from typing import Optional from fastapi import FastAPI, Query, HTTPException from fastapi.responses import HTMLResponse, JSONResponse from pydantic import BaseModel from transformers import AutoModel, AutoConfig, AutoTokenizer from transformers.utils import logging as hf_logging import torch # quiet transformers logs hf_logging.set_verbosity_error() app = FastAPI(title="Hugging Face Transformer Sizing API") # ---------- Helper Functions ---------- def humanize_bytes(n: int) -> str: """Convert bytes → human-readable format.""" if n < 1024: return f"{n} B" units = ["B", "KB", "MB", "GB", "TB"] i = int(math.floor(math.log(n, 1024))) return f"{n / (1024 ** i):.2f} {units[i]}" def model_parameter_counts(model: torch.nn.Module): """Return parameter counts and approximate memory usage.""" total, trainable, bytes_total, bytes_trainable = 0, 0, 0, 0 for p in model.parameters(): n = p.numel() b = p.element_size() * n total += n bytes_total += b if p.requires_grad: trainable += n bytes_trainable += b return { "total_params": total, "trainable_params": trainable, "approx_bytes": bytes_total, "trainable_bytes": bytes_trainable, "approx_bytes_human": humanize_bytes(bytes_total), "trainable_bytes_human": humanize_bytes(bytes_trainable), } def folder_size_bytes(path: str) -> int: """Return folder size in bytes.""" total = 0 for root, _, files in os.walk(path): for f in files: fp = os.path.join(root, f) try: total += os.path.getsize(fp) except OSError: pass return total # ---------- Pydantic Model ---------- class InspectResult(BaseModel): model_id: str model_class: str config: dict sizing: dict saved_size_bytes: Optional[int] saved_size_human: Optional[str] notes: Optional[str] # ---------- Routes ---------- @app.get("/", response_class=HTMLResponse) def index(): """Simple web UI.""" html = """
Examples: bert-base-uncased, roberta-base, distilbert-base-uncased
Results will appear in JSON format.
""" return HTMLResponse(html) @app.get("/inspect", response_model=InspectResult) def inspect( model: str = Query(..., description="Model ID, e.g. bert-base-uncased"), save_to_disk: bool = Query(True, description="Save to disk temporarily to get size (default True)") ): """Inspect model parameters, memory, and size.""" if not model: raise HTTPException(status_code=400, detail="Missing model name.") # --- Load config --- try: config = AutoConfig.from_pretrained(model) except Exception as e: raise HTTPException(status_code=400, detail=f"Could not load config: {e}") # --- Load model safely to CPU --- try: model_obj = AutoModel.from_pretrained(model, config=config, torch_dtype=torch.float32).to("cpu") except Exception as e: raise HTTPException(status_code=500, detail=f"Could not load model: {e}") sizing = model_parameter_counts(model_obj) # --- Compute disk size --- saved_size_bytes = None saved_size_human = None notes = "" if save_to_disk: try: tmp = tempfile.mkdtemp(prefix="hf_") model_obj.save_pretrained(tmp) try: tok = AutoTokenizer.from_pretrained(model) tok.save_pretrained(tmp) except Exception: notes = "Tokenizer not saved." saved_size_bytes = folder_size_bytes(tmp) saved_size_human = humanize_bytes(saved_size_bytes) finally: shutil.rmtree(tmp, ignore_errors=True) # --- Build config summary --- summary = {} for k in ("hidden_size", "num_hidden_layers", "vocab_size", "num_attention_heads", "intermediate_size"): if hasattr(config, k): summary[k] = getattr(config, k) # --- Result --- result = { "model_id": model, "model_class": model_obj.__class__.__name__, "config": summary, "sizing": sizing, "saved_size_bytes": saved_size_bytes, "saved_size_human": saved_size_human, "notes": notes or None } # cleanup del model_obj if torch.cuda.is_available(): torch.cuda.empty_cache() return JSONResponse(result) # ---------- Local Run ---------- if __name__ == "__main__": import uvicorn uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)