|
from langchain.chains import RetrievalQA, ConversationalRetrievalChain |
|
from langchain.vectorstores import Chroma |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.document_loaders import DirectoryLoader, TextLoader,PyPDFLoader |
|
from transformers import pipeline, AutoModelForCausalLM |
|
from langchain.llms import HuggingFacePipeline |
|
from langchain.embeddings import HuggingFaceInstructEmbeddings |
|
import gradio as gr |
|
from InstructorEmbedding import INSTRUCTOR |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_id = "gpt2-medium" |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
model = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
|
pipeline = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_length=100 |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local_llm = HuggingFacePipeline(pipeline=pipeline) |
|
|
|
loader = PyPDFLoader('bipolar.pdf') |
|
|
|
document = loader.load() |
|
text_spliter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) |
|
texts = text_spliter.split_documents(document) |
|
embedding = HuggingFaceInstructEmbeddings() |
|
docsearch = Chroma.from_documents(texts, embedding, persist_directory='db') |
|
|
|
retriever = docsearch.as_retriever(search_kwargs={"k": 3}) |
|
qa_chain = RetrievalQA.from_chain_type(llm=local_llm, |
|
chain_type="map_reduce", |
|
retriever=retriever, |
|
return_source_documents=True) |
|
|
|
|
|
|
|
def gradinterface(query,history): |
|
result = qa_chain({'query': query}) |
|
return result['result'] |
|
|
|
|
|
demo = gr.ChatInterface(fn=gradinterface, title='OUR_OWN_BOT') |
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |
|
|