Spaces:
Runtime error
Runtime error
| # app.py | |
| """ | |
| FastAPI app to inspect Hugging Face transformer model sizing: | |
| - Total & trainable parameter counts | |
| - Approximate memory usage in bytes / human-readable | |
| - Saved model disk size | |
| - Basic model config info | |
| To run locally: | |
| pip install fastapi "uvicorn[standard]" transformers torch | |
| uvicorn app:app --reload | |
| Endpoints: | |
| / → simple HTML form | |
| /inspect?model=bert-base-uncased → JSON sizing info | |
| """ | |
| import os | |
| import math | |
| import tempfile | |
| import shutil | |
| from typing import Optional | |
| from fastapi import FastAPI, Query, HTTPException | |
| from fastapi.responses import HTMLResponse, JSONResponse | |
| from pydantic import BaseModel | |
| from transformers import AutoModel, AutoConfig, AutoTokenizer | |
| from transformers.utils import logging as hf_logging | |
| import torch | |
| # quiet transformers logs | |
| hf_logging.set_verbosity_error() | |
| app = FastAPI(title="Hugging Face Transformer Sizing API") | |
| # ---------- Helper Functions ---------- | |
| def humanize_bytes(n: int) -> str: | |
| """Convert bytes → human-readable format.""" | |
| if n < 1024: | |
| return f"{n} B" | |
| units = ["B", "KB", "MB", "GB", "TB"] | |
| i = int(math.floor(math.log(n, 1024))) | |
| return f"{n / (1024 ** i):.2f} {units[i]}" | |
| def model_parameter_counts(model: torch.nn.Module): | |
| """Return parameter counts and approximate memory usage.""" | |
| total, trainable, bytes_total, bytes_trainable = 0, 0, 0, 0 | |
| for p in model.parameters(): | |
| n = p.numel() | |
| b = p.element_size() * n | |
| total += n | |
| bytes_total += b | |
| if p.requires_grad: | |
| trainable += n | |
| bytes_trainable += b | |
| return { | |
| "total_params": total, | |
| "trainable_params": trainable, | |
| "approx_bytes": bytes_total, | |
| "trainable_bytes": bytes_trainable, | |
| "approx_bytes_human": humanize_bytes(bytes_total), | |
| "trainable_bytes_human": humanize_bytes(bytes_trainable), | |
| } | |
| def folder_size_bytes(path: str) -> int: | |
| """Return folder size in bytes.""" | |
| total = 0 | |
| for root, _, files in os.walk(path): | |
| for f in files: | |
| fp = os.path.join(root, f) | |
| try: | |
| total += os.path.getsize(fp) | |
| except OSError: | |
| pass | |
| return total | |
| # ---------- Pydantic Model ---------- | |
| class InspectResult(BaseModel): | |
| model_id: str | |
| model_class: str | |
| config: dict | |
| sizing: dict | |
| saved_size_bytes: Optional[int] | |
| saved_size_human: Optional[str] | |
| notes: Optional[str] | |
| # ---------- Routes ---------- | |
| def index(): | |
| """Simple web UI.""" | |
| html = """ | |
| <html> | |
| <head><title>Transformer Sizing</title></head> | |
| <body style="font-family:Arial; max-width:700px; margin:40px auto;"> | |
| <h2>Hugging Face Transformer Sizing</h2> | |
| <form action="/inspect" method="get"> | |
| <label>Enter Model ID (e.g. bert-base-uncased):</label><br> | |
| <input type="text" name="model" value="bert-base-uncased" style="width:70%; padding:6px;"> | |
| <button type="submit" style="padding:6px;">Inspect</button> | |
| </form> | |
| <p>Examples: <code>bert-base-uncased</code>, <code>roberta-base</code>, <code>distilbert-base-uncased</code></p> | |
| <hr> | |
| <p>Results will appear in JSON format.</p> | |
| </body> | |
| </html> | |
| """ | |
| return HTMLResponse(html) | |
| def inspect( | |
| model: str = Query(..., description="Model ID, e.g. bert-base-uncased"), | |
| save_to_disk: bool = Query(True, description="Save to disk temporarily to get size (default True)") | |
| ): | |
| """Inspect model parameters, memory, and size.""" | |
| if not model: | |
| raise HTTPException(status_code=400, detail="Missing model name.") | |
| # --- Load config --- | |
| try: | |
| config = AutoConfig.from_pretrained(model) | |
| except Exception as e: | |
| raise HTTPException(status_code=400, detail=f"Could not load config: {e}") | |
| # --- Load model safely to CPU --- | |
| try: | |
| model_obj = AutoModel.from_pretrained(model, config=config, torch_dtype=torch.float32).to("cpu") | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Could not load model: {e}") | |
| sizing = model_parameter_counts(model_obj) | |
| # --- Compute disk size --- | |
| saved_size_bytes = None | |
| saved_size_human = None | |
| notes = "" | |
| if save_to_disk: | |
| try: | |
| tmp = tempfile.mkdtemp(prefix="hf_") | |
| model_obj.save_pretrained(tmp) | |
| try: | |
| tok = AutoTokenizer.from_pretrained(model) | |
| tok.save_pretrained(tmp) | |
| except Exception: | |
| notes = "Tokenizer not saved." | |
| saved_size_bytes = folder_size_bytes(tmp) | |
| saved_size_human = humanize_bytes(saved_size_bytes) | |
| finally: | |
| shutil.rmtree(tmp, ignore_errors=True) | |
| # --- Build config summary --- | |
| summary = {} | |
| for k in ("hidden_size", "num_hidden_layers", "vocab_size", "num_attention_heads", "intermediate_size"): | |
| if hasattr(config, k): | |
| summary[k] = getattr(config, k) | |
| # --- Result --- | |
| result = { | |
| "model_id": model, | |
| "model_class": model_obj.__class__.__name__, | |
| "config": summary, | |
| "sizing": sizing, | |
| "saved_size_bytes": saved_size_bytes, | |
| "saved_size_human": saved_size_human, | |
| "notes": notes or None | |
| } | |
| # cleanup | |
| del model_obj | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| return JSONResponse(result) | |
| # ---------- Local Run ---------- | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True) | |