Tom9358's picture
Force show label
32a27b0 verified
import gradio as gr
print(gr.__version__)
from transformers import AutoModelForSeq2SeqLM, NllbTokenizer
MODEL_URL = "Tom9358/nllb-tatoeba-gos-nld-v1"
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_URL)
tokenizer = NllbTokenizer.from_pretrained(
MODEL_URL,
force_download=True,
additional_special_tokens=["gos_Latn"]
)
lang_to_code = {
"Grunnegs": "gos_Latn",
"Nederlands": "nld_Latn",
"English": "eng_Latn",
"Español": "esp_Latn",
"Deutsch": "deu_Latn",
"Frysk": "fry_Latn"
}
def translate_text(src_lang_name, tgt_lang_name, text):
if not text.strip():
return "Voer eerst tekst in."
if src_lang_name == tgt_lang_name:
return "Kies twee verschillende talen."
src_lang = lang_to_code[src_lang_name]
tgt_lang = lang_to_code[tgt_lang_name]
tokenizer.src_lang = src_lang
tokenizer.tgt_lang = tgt_lang
inputs = tokenizer(
text,
return_tensors="pt",
padding="longest",
truncation=True,
max_length=120
)
result = model.generate(
**inputs.to(model.device),
forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
max_new_tokens=int(16 + 1.5 * inputs.input_ids.shape[1])
)
translation = tokenizer.batch_decode(result, skip_special_tokens=True)[0]
return translation
with gr.Blocks() as demo:
gr.Markdown("## Dizze NLLB vertoaler is allenneg traind op t poar Grunnegs - Nederlands, mor kinst kieken wat t dut mit Engels, Spoans, Duuts, en Frais")
with gr.Row():
src_dropdown = gr.Dropdown(
choices=list(lang_to_code.keys()),
value="Nederlands",
label="Van"
)
tgt_dropdown = gr.Dropdown(
choices=list(lang_to_code.keys()),
value="Grunnegs",
label="Naar"
)
input_box = gr.Textbox(
label="Voer tekst in",
placeholder="Tekst hier...",
lines=5,
buttons=["copy"],
show_label=True
)
output_box = gr.Textbox(
label="Vertaling",
lines=5,
buttons=["copy"],
show_label=True
)
translate_btn = gr.Button("Vertaal")
translate_btn.click(
fn=translate_text,
inputs=[src_dropdown, tgt_dropdown, input_box],
outputs=output_box
)
demo.launch()