{ "model_type": "zenvision", "architecture": "multimodal", "version": "1.0.0", "components": { "whisper": { "model_name": "openai/whisper-large-v2", "size_gb": 1.5, "task": "automatic-speech-recognition", "languages": 90 }, "bert": { "model_name": "bert-base-multilingual-cased", "size_mb": 400, "task": "text-embeddings", "languages": 104 }, "sentiment": { "model_name": "cardiffnlp/twitter-roberta-base-sentiment-latest", "size_mb": 200, "task": "sentiment-analysis", "accuracy": 0.94 }, "emotion": { "model_name": "j-hartmann/emotion-english-distilroberta-base", "size_mb": 300, "task": "emotion-detection", "emotions": ["joy", "sadness", "anger", "fear", "surprise", "disgust", "neutral"] }, "translation": { "model_name": "Helsinki-NLP/opus-mt-en-mul", "size_mb": 500, "task": "translation", "language_pairs": 10 } }, "total_size_gb": 3.2, "supported_languages": [ "en", "es", "fr", "de", "it", "pt", "zh", "ja", "ko", "ru", "ar", "hi" ], "output_formats": ["srt", "vtt", "json"], "performance": { "transcription_accuracy": { "en": 0.972, "es": 0.958, "fr": 0.945, "de": 0.931, "it": 0.948, "pt": 0.952 }, "processing_speed": { "cpu_i7": "0.3x real-time", "gpu_rtx3080": "2.1x real-time", "gpu_rtx4090": "3.8x real-time" } }, "requirements": { "python": ">=3.8", "ram_gb": 8, "storage_gb": 5, "gpu": "optional (CUDA compatible)" }, "license": "MIT", "authors": ["ZenVision Team"], "contact": "team@zenvision.ai" }