Spaces:
Sleeping
Sleeping
| from langchain.document_loaders import DirectoryLoader, TextLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.llms import CTransformers | |
| from langchain import PromptTemplate | |
| from langchain.chains import RetrievalQA | |
| from langchain.vectorstores import FAISS | |
| import time | |
| loader = DirectoryLoader("./infotext", glob="*.txt", loader_cls=TextLoader) | |
| # interpret information in the documents | |
| documents = loader.load() | |
| splitter = RecursiveCharacterTextSplitter() | |
| texts = splitter.split_documents(documents) | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cpu'}) | |
| # create and save the local database | |
| db = FAISS.from_documents(texts, embeddings) | |
| db.save_local("faiss") | |
| # prepare the template we will use when prompting the AI | |
| template = """Use the following pieces of information to answer the user's question. | |
| If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| Context: {context} | |
| Question: {question} | |
| Only return the helpful answer below and nothing else. | |
| Helpful answer: | |
| """ | |
| # load the language model | |
| config = {'max_new_tokens': 256, 'temperature': 0.01} | |
| llm = CTransformers(model="TheBloke/Llama-2-13B-chat-GGML", | |
| model_file="llama-2-13b-chat.ggmlv3.q2_K.bin", | |
| model_type="llama",config=config) | |
| # load the interpreted information from the local database | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cpu'}) | |
| db = FAISS.load_local("faiss", embeddings) | |
| # prepare a version of the llm pre-loaded with the local content | |
| retriever = db.as_retriever(search_kwargs={'k': 2}) | |
| prompt = PromptTemplate( | |
| template=template, | |
| input_variables=['context', 'question']) | |
| def query(question): | |
| model = RetrievalQA.from_chain_type(llm=llm, | |
| chain_type='stuff', | |
| retriever=retriever, | |
| return_source_documents=True, | |
| chain_type_kwargs={'prompt': prompt}) | |
| time_start = time.time() | |
| output = model({'query': question}) | |
| response = output["result"] | |
| time_elapsed = time.time() - time_start | |
| return [response, time_elapsed] |