Spaces:

windamir123
/

transformer-sizing

Runtime error

App Files Files Community

windamir123 commited on Oct 13, 2025

Commit

9f64b79

verified ·

1 Parent(s): b2cafbe

Update app.py

Browse files

Files changed (1) hide show

app.py +188 -0

app.py CHANGED Viewed

	@@ -0,0 +1,188 @@

+# app.py
+"""
+FastAPI app to inspect Hugging Face transformer model sizing:
+- Total & trainable parameter counts
+- Approximate memory usage in bytes / human-readable
+- Saved model disk size
+- Basic model config info
+To run locally:
+    pip install fastapi "uvicorn[standard]" transformers torch
+    uvicorn app:app --reload
+Endpoints:
+    /           → simple HTML form
+    /inspect?model=bert-base-uncased → JSON sizing info
+"""
+import os
+import math
+import tempfile
+import shutil
+from typing import Optional
+from fastapi import FastAPI, Query, HTTPException
+from fastapi.responses import HTMLResponse, JSONResponse
+from pydantic import BaseModel
+from transformers import AutoModel, AutoConfig, AutoTokenizer
+from transformers.utils import logging as hf_logging
+import torch
+# quiet transformers logs
+hf_logging.set_verbosity_error()
+app = FastAPI(title="Hugging Face Transformer Sizing API")
+# ---------- Helper Functions ----------
+def humanize_bytes(n: int) -> str:
+    """Convert bytes → human-readable format."""
+    if n < 1024:
+        return f"{n} B"
+    units = ["B", "KB", "MB", "GB", "TB"]
+    i = int(math.floor(math.log(n, 1024)))
+    return f"{n / (1024 ** i):.2f} {units[i]}"
+def model_parameter_counts(model: torch.nn.Module):
+    """Return parameter counts and approximate memory usage."""
+    total, trainable, bytes_total, bytes_trainable = 0, 0, 0, 0
+    for p in model.parameters():
+        n = p.numel()
+        b = p.element_size() * n
+        total += n
+        bytes_total += b
+        if p.requires_grad:
+            trainable += n
+            bytes_trainable += b
+    return {
+        "total_params": total,
+        "trainable_params": trainable,
+        "approx_bytes": bytes_total,
+        "trainable_bytes": bytes_trainable,
+        "approx_bytes_human": humanize_bytes(bytes_total),
+        "trainable_bytes_human": humanize_bytes(bytes_trainable),
+    }
+def folder_size_bytes(path: str) -> int:
+    """Return folder size in bytes."""
+    total = 0
+    for root, _, files in os.walk(path):
+        for f in files:
+            fp = os.path.join(root, f)
+            try:
+                total += os.path.getsize(fp)
+            except OSError:
+                pass
+    return total
+# ---------- Pydantic Model ----------
+class InspectResult(BaseModel):
+    model_id: str
+    model_class: str
+    config: dict
+    sizing: dict
+    saved_size_bytes: Optional[int]
+    saved_size_human: Optional[str]
+    notes: Optional[str]
+# ---------- Routes ----------
+@app.get("/", response_class=HTMLResponse)
+def index():
+    """Simple web UI."""
+    html = """
+    <html>
+    <head><title>Transformer Sizing</title></head>
+    <body style="font-family:Arial; max-width:700px; margin:40px auto;">
+        <h2>Hugging Face Transformer Sizing</h2>
+        <form action="/inspect" method="get">
+            <label>Enter Model ID (e.g. bert-base-uncased):</label><br>
+            <input type="text" name="model" value="bert-base-uncased" style="width:70%; padding:6px;">
+            <button type="submit" style="padding:6px;">Inspect</button>
+        </form>
+        <p>Examples: <code>bert-base-uncased</code>, <code>roberta-base</code>, <code>distilbert-base-uncased</code></p>
+        <hr>
+        <p>Results will appear in JSON format.</p>
+    </body>
+    </html>
+    """
+    return HTMLResponse(html)
+@app.get("/inspect", response_model=InspectResult)
+def inspect(
+    model: str = Query(..., description="Model ID, e.g. bert-base-uncased"),
+    save_to_disk: bool = Query(True, description="Save to disk temporarily to get size (default True)")
+):
+    """Inspect model parameters, memory, and size."""
+    if not model:
+        raise HTTPException(status_code=400, detail="Missing model name.")
+    # --- Load config ---
+    try:
+        config = AutoConfig.from_pretrained(model)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Could not load config: {e}")
+    # --- Load model safely to CPU ---
+    try:
+        model_obj = AutoModel.from_pretrained(model, config=config, torch_dtype=torch.float32).to("cpu")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Could not load model: {e}")
+    sizing = model_parameter_counts(model_obj)
+    # --- Compute disk size ---
+    saved_size_bytes = None
+    saved_size_human = None
+    notes = ""
+    if save_to_disk:
+        try:
+            tmp = tempfile.mkdtemp(prefix="hf_")
+            model_obj.save_pretrained(tmp)
+            try:
+                tok = AutoTokenizer.from_pretrained(model)
+                tok.save_pretrained(tmp)
+            except Exception:
+                notes = "Tokenizer not saved."
+            saved_size_bytes = folder_size_bytes(tmp)
+            saved_size_human = humanize_bytes(saved_size_bytes)
+        finally:
+            shutil.rmtree(tmp, ignore_errors=True)
+    # --- Build config summary ---
+    summary = {}
+    for k in ("hidden_size", "num_hidden_layers", "vocab_size", "num_attention_heads", "intermediate_size"):
+        if hasattr(config, k):
+            summary[k] = getattr(config, k)
+    # --- Result ---
+    result = {
+        "model_id": model,
+        "model_class": model_obj.__class__.__name__,
+        "config": summary,
+        "sizing": sizing,
+        "saved_size_bytes": saved_size_bytes,
+        "saved_size_human": saved_size_human,
+        "notes": notes or None
+    }
+    # cleanup
+    del model_obj
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    return JSONResponse(result)
+# ---------- Local Run ----------
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)