File size: 2,528 Bytes
f97cf59 d833be5 61b75fc f97cf59 61b75fc f97cf59 61b75fc f97cf59 61b75fc f97cf59 61b75fc f97cf59 61b75fc 9e00d8a 640e1af f97cf59 9514cd1 dd41a03 f97cf59 9514cd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import DirectoryLoader, TextLoader,PyPDFLoader
from transformers import pipeline, AutoModelForCausalLM
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings
import gradio as gr
from InstructorEmbedding import INSTRUCTOR
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import AutoModelForSequenceClassification, AutoTokenizer
# model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-mrpc")
# tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-mrpc")
#
# tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
#
# model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
model_id = "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_length=100
)
# local_llm = HuggingFacePipeline(pipeline=pipeline)
# pipe = pipeline(
# "text2text-generation",
# model=model,
# tokenizer=tokenizer,
# max_length=512,
# temperature=0.5,
# top_p=0.95,
# repetition_penalty=1.15
# )
local_llm = HuggingFacePipeline(pipeline=pipeline)
# print(local_llm('What is the capital of Syria?'))
loader = PyPDFLoader('bipolar.pdf')
# loader = TextLoader('info.txt')
document = loader.load()
text_spliter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_spliter.split_documents(document)
embedding = HuggingFaceInstructEmbeddings()
docsearch = Chroma.from_documents(texts, embedding, persist_directory='db')
retriever = docsearch.as_retriever(search_kwargs={"k": 3})
qa_chain = RetrievalQA.from_chain_type(llm=local_llm,
chain_type="map_reduce",
retriever=retriever,
return_source_documents=True)
# question = input('prompt: ')
# result = qa_chain({'query': question})
# print('result: ', result['result'])
def gradinterface(query,history):
result = qa_chain({'query': query})
return result['result']
demo = gr.ChatInterface(fn=gradinterface, title='OUR_OWN_BOT')
if __name__ == "__main__":
demo.launch(share=True)
|