File size: 4,411 Bytes
8f1972a
 
 
 
1f67093
8f1972a
 
04e8cf9
8f1972a
 
 
 
 
 
 
 
 
4741047
 
 
 
 
04e8cf9
8f1972a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04e8cf9
8f1972a
 
04e8cf9
8f1972a
 
 
 
 
 
 
 
 
 
55247a9
8f1972a
 
 
 
 
 
 
 
55247a9
8f1972a
 
 
 
 
55247a9
8f1972a
 
 
55247a9
 
8f1972a
55247a9
8f1972a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# from langchain import PromptTemplate
# from langchain import LLMChain
# from langchain.llms import CTransformers
# import gradio as gr

# B_INST, E_INST = "[INST]", "[/INST]"
# B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

# # DEFAULT_SYSTEM_PROMPT="\
# # You are a helpful, respectful, and honest assistant designed to improve English language skills. Your name is Nemo\
# # Always provide accurate and helpful responses to language improvement tasks, while ensuring safety and ethical standards. \
# # Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. \
# # Please ensure that your responses are socially unbiased, positive, and focused on enhancing language skills. \
# # If a question does not make sense or is not factually coherent, explain why instead of answering something incorrect. \
# # If you don't know the answer to a question, please don't share false information. \
# # Your role is to guide users through various language exercises and challenges, helping them to practice and improve their English skills in a fun and engaging way. \
# # Always encourage users to try different approaches and provide constructive feedback to help them progress."
# DEFAULT_SYSTEM_PROMPT="\
# You are a helpful, respectful, and honest assistant designed to improve English language skills. Your name is Nemo\
# If you don't know the answer to a question, please don't share false information. \
# Your role is to guide users through various language exercises and challenges, helping them to practice and improve their English skills in a fun and engaging way. \
# Always encourage users to try different approaches and provide constructive feedback to help them progress."

# instruction = "Have a good conversation: \n\n {text}"


# SYSTEM_PROMPT = B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS

# template = B_INST + SYSTEM_PROMPT + instruction + E_INST

# prompt = PromptTemplate(template=template, input_variables=["text"])

# # llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGUF", model_file="llama-2-7b-chat.Q3_K_S.gguf",
# llm = CTransformers(model="NousResearch/Llama-2-7b-chat-hf",
#                     model_type='llama',
#                     config={'max_new_tokens': 128,
#                             'temperature': 0.01}
#                     )

# LLM_Chain = LLMChain(prompt=prompt, llm=llm)

# def greet(prompt):
#     return LLM_Chain.run(prompt)

# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
# iface.launch()










########################3
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import DirectoryLoader, TextLoader,PyPDFLoader
from transformers import pipeline, AutoModelForCausalLM
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings
import gradio as gr
from InstructorEmbedding import INSTRUCTOR
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

from transformers import AutoModelForSequenceClassification, AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=200,
    temperature=0.8,
    top_p=0.95,
    repetition_penalty=1.15,
    do_sample=True
)

local_llm = HuggingFacePipeline(pipeline=pipe)
loader = PyPDFLoader('conv.pdf')
# loader = TextLoader('info.txt')
document = loader.load()
text_spliter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_spliter.split_documents(document)
embedding = HuggingFaceInstructEmbeddings()
docsearch = Chroma.from_documents(texts, embedding, persist_directory='db')

retriever = docsearch.as_retriever(search_kwargs={"k": 3})
qa_chain = RetrievalQA.from_chain_type(llm=local_llm,
                                       chain_type="stuff",
                                       retriever=retriever,
                                       return_source_documents=True)

def gradinterface(query,history):
    result = qa_chain({'query': query})
    return result['result']


demo = gr.ChatInterface(fn=gradinterface, title='OUR_OWN_BOT')

if __name__ == "__main__":
    demo.launch(share=True)