Spaces:
Sleeping
Sleeping
File size: 2,480 Bytes
1b25c4c 71c3cac 1b25c4c b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac b66e441 71c3cac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import os
import pickle
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from InstructorEmbedding import INSTRUCTOR
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from langchain import HuggingFaceHub
from langchain.utilities import GoogleSerperAPIWrapper
import gradio as gr
import pandas as pd
df = pd.read_csv('linkss.csv')
url = []
for i in df.itertuples():
url.append(f"{i[1]}")
class Chatbot:
def __init__(self):
os.environ["Hugging_Face_API_KEY"] = "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"
os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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'
os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee"
self.load_data()
self.load_embeddings()
self.create_qa_model()
def load_data(self):
urls = url
loaders = UnstructuredURLLoader(urls=urls)
self.data = loaders.load()
def split_documents(self):
text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20)
self.docs = text_splitter.split_documents(self.data)
def create_embeddings(self):
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained")
db_instructEmbedd = FAISS.from_documents(self.docs, instructor_embeddings)
self.retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3})
with open("db_instructEmbedd.pkl", "wb") as f:
pickle.dump(db_instructEmbedd, f)
def load_embeddings(self):
with open("db_instructEmbedd.pkl", "rb") as f:
self.retriever = pickle.load(f)
self.retriever = self.retriever.as_retriever(search_kwargs={"k": 3})
def create_qa_model(self):
llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1})
self.qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=self.retriever, return_source_documents=True)
def chat(self, query):
result = self.qa({'query': query})
return result['result']
chatbot = Chatbot()
def chatbot_response(query):
response = chatbot.chat(query)
return response
iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="Chatbot Trained on Indian Exam Articles")
iface.launch()
|