Spaces:
Sleeping
Sleeping
File size: 1,548 Bytes
bf90c83 e979f5e dc3ea55 bf90c83 e979f5e dc3ea55 bf90c83 dc3ea55 bf90c83 dc3ea55 bf90c83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# import spaces
import functools
# Download the single GGUF shard by its repo path:
# llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
# llm = Llama(
# model_path=model_path,
# n_ctx=512, # down from 4096
# low_vram=True, # llama.cpp low-vram mode
# f16_kv=True, # half-precision kv cache
# use_mmap=True, # mmap file
# use_mlock=False,
# )
# print("Llama backend initialized successfully!")
# @spaces.CPU
@functools.lru_cache(maxsize=1)
def llm_gpu():
model_path = hf_hub_download(
repo_id="Inventors-Hub/SwarmChat-models",
repo_type="model",
filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
)
llm = Llama(
model_path=model_path,
n_ctx=512, # down from 4096
low_vram=True, # llama.cpp low-vram mode
f16_kv=True, # half-precision kv cache
use_mmap=True, # mmap file
use_mlock=False,
)
return llm
# Function to process text using EuroLLM
def translate_text(text):
input_prompt = f"""
<|im_start|>system
<|im_end|>
<|im_start|>user
Translate the following text to English:
Text: {text}
English:
<|im_end|>
<|im_start|>assistant
"""
llm = llm_gpu()
output = llm(input_prompt, max_tokens=1024, temperature=0)
translated_text = output.get("choices", [{}])[0].get("text", "").strip()
return translated_text
|