import glob import gradio as gr import tempfile import os import sr100_model_compiler import html import pathlib import spaces # ---------- Helpers ---------- def _resolve_uploaded_path(uploaded): """ Normalize Gradio File input into a filesystem path. Handles: str, dict with {path|name}, file-like objects with .path/.name, or a list/tuple of the above. """ if uploaded is None: return None if isinstance(uploaded, (list, tuple)) and uploaded: return _resolve_uploaded_path(uploaded[0]) if isinstance(uploaded, str): return uploaded if isinstance(uploaded, dict): return uploaded.get("path") or uploaded.get("name") for attr in ("path", "name"): if hasattr(uploaded, attr): return getattr(uploaded, attr) return None @spaces.GPU def compile_model(model_name, vmem_value, lpmem_value, uploaded_model): # Decide the source model path (uploaded has priority) uploaded_path = _resolve_uploaded_path(uploaded_model) model_path = uploaded_path or model_name # Basic validations if not model_path or not os.path.exists(model_path): return ( "
" "❌ ERROR: Could not locate the model file you selected or uploaded." "
" ) if pathlib.Path(model_path).suffix.lower() != ".tflite": return ( "
" "❌ ERROR: Please provide a .tflite model file.
" ) # Create a temporary directory with tempfile.TemporaryDirectory() as out_dir: print(f"Created temporary directory: {out_dir}") vmem_size_limit = int(vmem_value * 1000) lpmem_size_limit = int(lpmem_value * 1000) # Run the model fitter with better error handling try: original_file_name = os.path.basename(model_path) root, ext = os.path.splitext(original_file_name) safe_root = root.replace('.', '_') model_file_name = f"{safe_root}{ext}" temp_model_path = os.path.join(out_dir, model_file_name) print(f"Copying model to sanitized path: {temp_model_path}") with open(model_path, "rb") as src, open(temp_model_path, "wb") as dst: dst.write(src.read()) print(f"Starting model optimization for {temp_model_path}") print(f"VMEM limit: {vmem_size_limit}, LPMEM limit: {lpmem_size_limit}") success, results = sr100_model_compiler.sr100_model_optimizer( model_file=temp_model_path, vmem_size_limit=vmem_size_limit, lpmem_size_limit=lpmem_size_limit, optimize='Performance' ) print(f"Optimization complete. Success: {success}") print(f"Results: {results}") # Check if results is None or missing expected keys if not results: return ( "
" "❌ ERROR: Optimization returned empty results
" ) except Exception as e: error_message = str(e) print(f"Exception during model optimization: {error_message}") return ( "
" "❌ ERROR: Model optimization failed
" "
Error details:
" f"
{html.escape(error_message)}
" ) output = [] # Check for specific failure cases from results if not success: print(f"Optimization reported failure. Reason: {results.get('failure_reason', 'Unknown')}") # Check if NPU cycles is zero (CPU-only model) npu_zero = results.get('cycles_npu', 0) == 0 if npu_zero: output.append( "
" "⚠️ CPU-ONLY: Model fits in memory but no operators mapped to the NPU
" ) output.append( "
" "This typically means the model contains ops not supported by the SR100 NPU. " "Please review/convert unsupported ops or choose an NPU-friendly model.
" ) output.append("
Compiler log:
") output.append( f"
"
            f"{html.escape(results.get('vela_log', 'No log available'))}
" ) else: if success: output.append( "
" "✅ SUCCESS: Model fits on SR100 and below is the estimates Performance
" ) else: output.append( "
" "❌ FAILURE: Model does not fit on SR100, Please check Memory usage of Model
" ) # Format metrics in a nice table table_rows = [] # Calculate all the metrics weights_size = results['weights_size'] / 1000.0 arena_size = results['arena_cache_size'] / 1000.0 clock = results['core_clock'] / 1.0e6 infer_time = results['inference_time'] * 1000.0 infer_fps = results['inferences_per_sec'] vmem_size = results['vmem_size'] / 1000.0 lpmem_size = results['lpmem_size'] / 1000.0 vmem_size_limit = results['vmem_size_limit'] / 1000.0 lpmem_size_limit = results['lpmem_size_limit'] / 1000.0 vmem_perc = results['vmem_size'] * 100.0 / results['vmem_size_limit'] lpmem_perc = results['lpmem_size'] * 100.0 / results['lpmem_size_limit'] # Add rows to the table metrics = [ ("Clock Frequency", f"{clock:0.1f} MHz"), ("Inference Time", f"{infer_time:0.1f} ms"), ("Inferences Per Second", f"{infer_fps:0.1f} fps"), ("Arena Cache Size", f"{arena_size:0.3f} kB"), ("Model Size", f"{weights_size:0.3f} kB"), ("Model Location", f"{results['model_loc']}"), ("System Configuration", f"{results['system_config']}"), ("VMEM Size", f"{vmem_size:0.3f} kB ({vmem_perc:0.1f}% of {vmem_size_limit:0.3f} kB limit)"), ("LPMEM Size", f"{lpmem_size:0.3f} kB ({lpmem_perc:0.1f}% of {lpmem_size_limit:0.3f} kB limit)") ] for label, value in metrics: table_rows.append( "" f"{label}" f"{value}" "" ) output.append( "" + "".join(table_rows) + "
" ) return "".join(output) # Get all available models model_choices = glob.glob('models/*.tflite') custom_css = """ :root { --color-accent: #007dc3; --color-primary-500: #007dc3; --color-primary-600: #007dc3; } body, .gradio-container, #root { background: #fff !important; } /* Hide Gradio footer and settings */ footer, .gradio-footer, .svelte-1ipelgc, .gradio-logo, .gradio-app__settings { display: none !important; } /* Style input labels and controls */ .gradio-slider label, .gradio-radio label, .gradio-dropdown label, .gradio-file label { color: #007dc3 !important; font-weight: bold; } .gradio-slider input[type="range"]::-webkit-slider-thumb, .gradio-slider input[type="range"]::-moz-range-thumb, .gradio-slider input[type="range"]::-ms-thumb { background: #007dc3 !important; } .gradio-radio input[type="radio"]:checked + span { background: #007dc3 !important; border-color: #007dc3 !important; } .gradio-dropdown select, .gradio-file input[type="file"] { border-color: #007dc3 !important; } .gradio-button { background: #007dc3 !important; color: #fff !important; border: none !important; } """ with gr.Blocks(css=custom_css) as demo: gr.Markdown("

SR100 Model Compiler

", elem_id="main_title") gr.Markdown("

Bring a TFlite INT8 model and compile it for Synaptics Astra SR100. Learn more at Synaptics AI Developer Zone

", elem_id="subtitle") gr.Markdown("""

SR100 includes the following on-chip SRAM memories:
- 1536 kB of Virtual Memory SRAM (VMEM) for high-speed operations.
- 1536 kB of Low Power SRAM (LPMEM) for images, audio, and other less-performance-critical data.

The amount of memory allocated to the model is customizable. Any memory not allocated to the model is usable by the application.
Ensure that the Arena cache size is smaller than the available VMEM to ensure it fits and runs optimally.

""", elem_id="memory_note" ) with gr.Row(): vmem_slider = gr.Slider(minimum=1, maximum=1536, step=1.024, label="Set total VMEM SRAM size available in kB", value=1536.0) lpmem_slider = gr.Slider(minimum=1, maximum=1536, step=1.024, label="Set total LPMEM SRAM size in kB", value=1536.0) model_dropdown = gr.Dropdown( label="Select a model", value='models/person_classification_256x448.tflite', choices=model_choices ) # Add file upload component model_upload = gr.File(label="Or upload a .tflite INT8 model. Please note, Uploaded models are stored in a temporary directory and will be deleted automatically after processing.", file_types=[".tflite"], file_count="single") # Run the compile compile_btn = gr.Button("Compile Model") compile_text = gr.Markdown("Waiting for model results") # Compute options compile_btn.click(compile_model, inputs=[model_dropdown, vmem_slider, lpmem_slider, model_upload], outputs=[compile_text]) gr.HTML("""
For a detailed walkthrough, please see our Evaluate Model Guide.
This Space uses a simulation toolchain to estimate model performance providing results that closely reflect real hardware behavior.

Request a Machina Micro [MCU] Dev Kit with Astra SR100 MCU.
""") if __name__ == "__main__": demo.launch()