File size: 1,868 Bytes
f97cf59
 
 
d833be5
61b75fc
f97cf59
 
 
 
 
 
 
61b75fc
f97cf59
61b75fc
37ddf11
 
61b75fc
2416f1c
 
f97cf59
 
2416f1c
 
 
 
 
f97cf59
 
2416f1c
9e00d8a
640e1af
f97cf59
2416f1c
f97cf59
 
 
 
 
 
37ddf11
f97cf59
 
2416f1c
dd41a03
f97cf59
 
 
 
2416f1c
f97cf59
 
9514cd1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import DirectoryLoader, TextLoader,PyPDFLoader
from transformers import pipeline, AutoModelForCausalLM
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings
import gradio as gr
from InstructorEmbedding import INSTRUCTOR
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

from transformers import AutoModelForSequenceClassification, AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=200,
    temperature=0.8,
    top_p=0.95,
    repetition_penalty=1.15,
    do_sample=True
)

local_llm = HuggingFacePipeline(pipeline=pipe)
loader = PyPDFLoader('bipolar.pdf')
# loader = TextLoader('info.txt')
document = loader.load()
text_spliter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_spliter.split_documents(document)
embedding = HuggingFaceInstructEmbeddings()
docsearch = Chroma.from_documents(texts, embedding, persist_directory='db')

retriever = docsearch.as_retriever(search_kwargs={"k": 3})
qa_chain = RetrievalQA.from_chain_type(llm=local_llm,
                                       chain_type="stuff",
                                       retriever=retriever,
                                       return_source_documents=True)

def gradinterface(query,history):
    result = qa_chain({'query': query})
    return result['result']


demo = gr.ChatInterface(fn=gradinterface, title='OUR_OWN_BOT')

if __name__ == "__main__":
    demo.launch(share=True)