Spaces:

Jayem-11
/

LLama2_gguf

Sleeping

App Files Files Community

LLama2_gguf / qa.py

Jayem-11

Upload 4 files

58b29d7 verified almost 2 years ago

raw

history blame contribute delete

2.4 kB

	from langchain.document_loaders import DirectoryLoader, TextLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.llms import CTransformers
	from langchain import PromptTemplate
	from langchain.chains import RetrievalQA
	from langchain.vectorstores import FAISS
	import time

	loader = DirectoryLoader("./infotext", glob="*.txt", loader_cls=TextLoader)

	# interpret information in the documents
	documents = loader.load()
	splitter = RecursiveCharacterTextSplitter()
	texts = splitter.split_documents(documents)
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={'device': 'cpu'})

	# create and save the local database
	db = FAISS.from_documents(texts, embeddings)
	db.save_local("faiss")

	# prepare the template we will use when prompting the AI
	template = """Use the following pieces of information to answer the user's question.
	If you don't know the answer, just say that you don't know, don't try to make up an answer.
	Context: {context}
	Question: {question}
	Only return the helpful answer below and nothing else.
	Helpful answer:
	"""

	# load the language model
	config = {'max_new_tokens': 256, 'temperature': 0.01}

	llm = CTransformers(model="TheBloke/Llama-2-13B-chat-GGML",
	model_file="llama-2-13b-chat.ggmlv3.q2_K.bin",
	model_type="llama",config=config)

	# load the interpreted information from the local database
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={'device': 'cpu'})

	db = FAISS.load_local("faiss", embeddings)

	# prepare a version of the llm pre-loaded with the local content
	retriever = db.as_retriever(search_kwargs={'k': 2})
	prompt = PromptTemplate(
	template=template,
	input_variables=['context', 'question'])

	def query(question):
	model = RetrievalQA.from_chain_type(llm=llm,
	chain_type='stuff',
	retriever=retriever,
	return_source_documents=True,
	chain_type_kwargs={'prompt': prompt})
	time_start = time.time()
	output = model({'query': question})
	response = output["result"]
	time_elapsed = time.time() - time_start

	return [response, time_elapsed]