File size: 2,198 Bytes
de1df0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import os
import keyboard
import time
import requests
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
#from langchain.vectorstores.weaviate import Weaviate
from langchain.document_loaders import TextLoader #for textfiles
from langchain.text_splitter import CharacterTextSplitter #text splitter
from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
from langchain.vectorstores import FAISS #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub
from langchain.document_loaders import UnstructuredPDFLoader #load pdf
from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredURLLoader #load urls into docoument-loader
import requests
import textwrap
from langchain.document_loaders import TextLoader
loader = TextLoader('./KS-all-info_rev1.txt')
documents = loader.load()
def wrap_text_preserve_newlines(text, width=110):
# Split the input text into lines based on newline characters
lines = text.split('\n')
# Wrap each line individually
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
# Join the wrapped lines back together using newline characters
wrapped_text = '\n'.join(wrapped_lines)
return wrapped_text
text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=10)
docs = text_splitter.split_documents(documents)
# Embeddings
embeddings = HuggingFaceEmbeddings()
#Create the vectorized db
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
db = FAISS.from_documents(docs, embeddings)
llm=HuggingFaceHub(repo_id="MBZUAI/LaMini-Flan-T5-783M", model_kwargs={"temperature":0, "max_length":512})
chain = load_qa_chain(llm, chain_type="stuff")
def run_chain(query):
result=chain.run(input_documents=docs, question=query)
return result
#keyboard.unhook_all()########################### |