Spaces:
Runtime error
Runtime error
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| # Download the single GGUF shard by its repo path: | |
| model_path = hf_hub_download( | |
| repo_id="Inventors-Hub/SwarmChat-models", | |
| repo_type="model", | |
| filename="EuroLLM-9B-Instruct-Q4_K_M.gguf", | |
| ) | |
| # llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True) | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=512, # down from 4096 | |
| low_vram=True, # llama.cpp low-vram mode | |
| f16_kv=True, # half-precision kv cache | |
| use_mmap=True, # mmap file | |
| use_mlock=False, | |
| ) | |
| # print("Llama backend initialized successfully!") | |
| # Function to process text using EuroLLM | |
| def translate_text(text): | |
| input_prompt = f""" | |
| <|im_start|>system | |
| <|im_end|> | |
| <|im_start|>user | |
| Translate the following text to English: | |
| Text: {text} | |
| English: | |
| <|im_end|> | |
| <|im_start|>assistant | |
| """ | |
| output = llm(input_prompt, max_tokens=1024, temperature=0) | |
| translated_text = output.get("choices", [{}])[0].get("text", "").strip() | |
| return translated_text | |