File size: 2,374 Bytes
1476834
f00d5c5
1476834
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
851b1ce
32a27b0
 
1476834
 
 
 
851b1ce
32a27b0
 
1476834
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
print(gr.__version__)
from transformers import AutoModelForSeq2SeqLM, NllbTokenizer

MODEL_URL = "Tom9358/nllb-tatoeba-gos-nld-v1"
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_URL)
tokenizer = NllbTokenizer.from_pretrained(
    MODEL_URL,
    force_download=True,
    additional_special_tokens=["gos_Latn"]
)

lang_to_code = {
    "Grunnegs": "gos_Latn",
    "Nederlands": "nld_Latn",
    "English": "eng_Latn",
    "Español": "esp_Latn",
    "Deutsch": "deu_Latn",
    "Frysk": "fry_Latn"
}

def translate_text(src_lang_name, tgt_lang_name, text):
    if not text.strip():
        return "Voer eerst tekst in."
    if src_lang_name == tgt_lang_name:
        return "Kies twee verschillende talen."

    src_lang = lang_to_code[src_lang_name]
    tgt_lang = lang_to_code[tgt_lang_name]
    
    tokenizer.src_lang = src_lang
    tokenizer.tgt_lang = tgt_lang
    
    inputs = tokenizer(
        text,
        return_tensors="pt",
        padding="longest",
        truncation=True,
        max_length=120
    )
    result = model.generate(
        **inputs.to(model.device),
        forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
        max_new_tokens=int(16 + 1.5 * inputs.input_ids.shape[1])
    )
    translation = tokenizer.batch_decode(result, skip_special_tokens=True)[0]
    return translation

with gr.Blocks() as demo:
    gr.Markdown("## Dizze NLLB vertoaler is allenneg traind op t poar Grunnegs - Nederlands, mor kinst kieken wat t dut mit Engels, Spoans, Duuts, en Frais")
    
    with gr.Row():
        src_dropdown = gr.Dropdown(
            choices=list(lang_to_code.keys()), 
            value="Nederlands", 
            label="Van"
        )
        tgt_dropdown = gr.Dropdown(
            choices=list(lang_to_code.keys()), 
            value="Grunnegs", 
            label="Naar"
        )
    
    input_box = gr.Textbox(
        label="Voer tekst in", 
        placeholder="Tekst hier...", 
        lines=5,
        buttons=["copy"],
        show_label=True
    )
    
    output_box = gr.Textbox(
        label="Vertaling", 
        lines=5,
        buttons=["copy"],
        show_label=True
    )
    
    translate_btn = gr.Button("Vertaal")
    
    translate_btn.click(
        fn=translate_text,
        inputs=[src_dropdown, tgt_dropdown, input_box],
        outputs=output_box
    )

demo.launch()