File size: 1,534 Bytes
3a67b40
 
 
 
 
 
 
f2b9d5a
3a67b40
 
b70fb3b
3a67b40
 
20bba57
9401ac5
 
 
6cdbbf8
3a67b40
 
6cdbbf8
 
3a67b40
 
6cdbbf8
3a67b40
 
4106de1
4114738
4106de1
6cdbbf8
3a67b40
 
 
 
3666dfa
 
 
 
 
 
 
002833d
3666dfa
4106de1
 
3666dfa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from typing import Dict, List, Any
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
import torch
import io
import os
import base64

class EndpointHandler():
    def __init__(self, model_dir: str):
        self.pipeline = KPipeline(lang_code='a')

    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
        inputs = data.get("inputs", {})
        text = inputs.get("text")
        voice = inputs.get("voice")
        
        audio_segments = []
        generator = self.pipeline(text, voice)
        
        # Direct append without saving to disk
        for i, (gs, ps, audio) in enumerate(generator):
            audio_segments.append(audio)
        
        # Concatenate all audio segments
        full_audio = torch.cat([torch.tensor(a) for a in audio_segments])

        sample_rate = 24000
        audio_length_seconds = len(full_audio) / sample_rate
        
        # Write full audio to a binary buffer
        buffer = io.BytesIO()
        sf.write(buffer, full_audio.numpy(), 24000, format='WAV')
        buffer.seek(0)
        audio_bytes = buffer.read()
        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
        return {
            "headers": {
                "Content-Disposition": "attachment; filename=output.wav",
                "Content-Type": "audio/wav"
            },
            "body": audio_b64,
            "statusCode": 200,
            "isBase64Encoded": True,
            "audio_length_seconds": audio_length_seconds
        }