Spaces:

anen
/

DentalGPT

Running

File size: 2,198 Bytes

de1df0d

import os
import keyboard
import time
import requests
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
#from langchain.vectorstores.weaviate import Weaviate
from langchain.document_loaders import TextLoader  #for textfiles
from langchain.text_splitter import CharacterTextSplitter #text splitter
from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
from langchain.vectorstores import FAISS  #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub
from langchain.document_loaders import UnstructuredPDFLoader  #load pdf
from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredURLLoader  #load urls into docoument-loader
import requests
import textwrap
from langchain.document_loaders import TextLoader

loader = TextLoader('./KS-all-info_rev1.txt')
documents = loader.load()
def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')
    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text
text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=10)
docs = text_splitter.split_documents(documents)
# Embeddings
embeddings = HuggingFaceEmbeddings()
#Create the vectorized db
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
db = FAISS.from_documents(docs, embeddings)
llm=HuggingFaceHub(repo_id="MBZUAI/LaMini-Flan-T5-783M", model_kwargs={"temperature":0, "max_length":512})
chain = load_qa_chain(llm, chain_type="stuff")
def run_chain(query):
    result=chain.run(input_documents=docs, question=query)
    return result


#keyboard.unhook_all()###########################