Spaces:

Loversofdeath
/

lore

Runtime error

App Files Files Community

lore / app.py

Loversofdeath

Upload 3 files

b41870d verified 9 months ago

raw

history blame contribute delete

2.17 kB

	import os
	import glob
	import uvicorn
	from fastapi import FastAPI
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np

	app = FastAPI()

	# Загружаем модель для создания эмбеддингов
	model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

	# Папка с файлами лора
	LORE_DIR = './lore'

	# Параметры нарезки текста
	CHUNK_SIZE = 1000 # символов
	CHUNK_OVERLAP = 100 # перекрытие кусков для связности текста

	# Загружаем и обрабатываем лор
	def load_lore_chunks():
	chunks = []
	file_paths = glob.glob(os.path.join(LORE_DIR, '*.txt'))

	for path in file_paths:
	with open(path, 'r', encoding='utf-8') as f:
	text = f.read()
	# чистим мусорные символы
	text = ''.join(c if 0x20 <= ord(c) <= 0xFFFF else ' ' for c in text)
	# разбиваем на кусочки
	for i in range(0, len(text), CHUNK_SIZE - CHUNK_OVERLAP):
	chunk = text[i:i + CHUNK_SIZE].strip()
	if chunk:
	chunks.append(chunk)
	return chunks

	# Загружаем чанки и строим эмбеддинги
	print("Идёт загрузка файлов...")
	lore_chunks = load_lore_chunks()
	lore_embeddings = model.encode(lore_chunks)
	print(f"Загружено {len(lore_chunks)} частей текста.")

	# Функция для поиска лучшего ответа
	def find_best_answer(question):
	question_embedding = model.encode([question])[0]
	similarities = cosine_similarity([question_embedding], lore_embeddings)[0]
	best_idx = np.argmax(similarities)
	return lore_chunks[best_idx]

	@app.get("/")
	def read_root():
	return {"message": "Добро пожаловать в Лор-Бота!"}

	@app.get("/ask/")
	def ask_question(q: str):
	answer = find_best_answer(q)
	return {"question": q, "answer": answer}