import openai | |
import os | |
openai.api_key=os.getenv("OPENAI_API_KEY") | |
from dotenv import load_dotenv | |
load_dotenv() | |
from flask import Flask, jsonify, render_template, request | |
import requests, json | |
# import nltk | |
#"punkt") | |
import shutil | |
from werkzeug.utils import secure_filename | |
from werkzeug.datastructures import FileStorage | |
import nltk | |
from datetime import datetime | |
import openai | |
from langchain.llms import OpenAI | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.embeddings import HuggingFaceBgeEmbeddings | |
from langchain.embeddings import HuggingFaceInstructEmbeddings | |
from langchain.embeddings import SentenceTransformerEmbeddings | |
from langchain.document_loaders import SeleniumURLLoader, PyPDFLoader | |
from langchain.vectorstores import Chroma | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains import VectorDBQA | |
from langchain.document_loaders import UnstructuredFileLoader, TextLoader | |
from langchain import PromptTemplate | |
from langchain.chains import RetrievalQA | |
from langchain.memory import ConversationBufferWindowMemory | |
import warnings | |
warnings.filterwarnings("ignore") | |
#app = Flask(__name__) | |
app = Flask(__name__, template_folder="./") | |
# Create a directory in a known location to save files to. | |
uploads_dir = os.path.join(app.root_path,'static', 'uploads') | |
os.makedirs(uploads_dir, exist_ok=True) | |
def pretty_print_docs(docs): | |
print(f"\n{'-' * 100}\n".join([f"Document {i + 1}:\n\n" + "Document Length>>>" + str( | |
len(d.page_content)) + "\n\nDocument Source>>> " + d.metadata['source'] + "\n\nContent>>> " + d.page_content for | |
i, d in enumerate(docs)])) | |
def getEmbeddingModel(embeddingId): | |
if (embeddingId == 1): | |
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
elif (embeddingId == 2): | |
model_name = "hkunlp/instructor-large" | |
model_kwargs = {'device': 'cpu'} | |
encode_kwargs = {'normalize_embeddings': True} | |
embeddings = HuggingFaceInstructEmbeddings(model_name=model_name,model_kwargs=model_kwargs,encode_kwargs=encode_kwargs) | |
elif (embeddingId == 2): | |
model_name = "BAAI/bge-large-en-v1.5" | |
model_kwargs = {'device': 'cuda'} | |
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity | |
model = HuggingFaceBgeEmbeddings(model_name=model_name,model_kwargs=model_kwargs,encode_kwargs=encode_kwargs) | |
else: | |
embeddings = OpenAIEmbeddings() | |
return OpenAIEmbeddings() | |
def getLLMModel(LLMID): | |
llm = OpenAI(temperature=0.0) | |
return llm | |
def clearKBUploadDirectory(uploads_dir): | |
for filename in os.listdir(uploads_dir): | |
file_path = os.path.join(uploads_dir, filename) | |
print("Clearing Doc Directory. Trying to delete" + file_path) | |
try: | |
if os.path.isfile(file_path) or os.path.islink(file_path): | |
os.unlink(file_path) | |
elif os.path.isdir(file_path): | |
shutil.rmtree(file_path) | |
except Exception as e: | |
print('Failed to delete %s. Reason: %s' % (file_path, e)) | |
def loadKB(fileprovided, urlProvided, uploads_dir, request): | |
documents = [] | |
if fileprovided: | |
# Delete Files | |
clearKBUploadDirectory(uploads_dir) | |
# Read and Embed New Files provided | |
for file in request.files.getlist('files[]'): | |
print("File Received>>>" + file.filename) | |, secure_filename(file.filename))) | |
loader = PyPDFLoader(os.path.join(uploads_dir, secure_filename(file.filename))) | |
documents.extend(loader.load()) | |
else: | |
loader = TextLoader('Jio.txt') | |
documents.extend(loader.load()) | |
if urlProvided: | |
weburl = request.form.getlist('weburl') | |
print(weburl) | |
urlList = weburl[0].split(';') | |
print(urlList) | |
print("Selenium Started","%H:%M:%S")) | |
# urlLoader=RecursiveUrlLoader(urlList[0]) | |
urlLoader = SeleniumURLLoader(urlList) | |
print("Selenium Completed","%H:%M:%S")) | |
documents.extend(urlLoader.load()) | |
print("inside selenium loader:") | |
print(documents) | |
return documents | |
def getRAGChain(customerName,customerDistrict, custDetailsPresent,vectordb): | |
chain = RetrievalQA.from_chain_type( | |
llm=getLLMModel(0), | |
chain_type='stuff', | |
retriever=vectordb.as_retriever(), | |
verbose=False, | |
chain_type_kwargs={ | |
"verbose": False, | |
"prompt": createPrompt(customerName, customerDistrict, custDetailsPresent), | |
"memory": ConversationBufferWindowMemory( | |
k=3, | |
memory_key="history", | |
input_key="question"), | |
} | |
) | |
return chain | |
def createVectorDB(documents): | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150) | |
texts = text_splitter.split_documents(documents) | |
print("All chunk List START ***********************\n\n") | |
pretty_print_docs(texts) | |
print("All chunk List END ***********************\n\n") | |
embeddings = getEmbeddingModel(0) | |
vectordb = Chroma.from_documents(texts, embeddings) | |
return vectordb | |
def createPrompt(cName, cCity, custDetailsPresent): | |
cProfile = "Customer's Name is " + cName + "\nCustomer's lives in or customer's Resident State or Customer's place is " + cCity + "\n" | |
print(cProfile) | |
template1 = """You role is of a Professional Customer Support Executive and your name is Jio AIAssist. | |
You are talking to the below customer whose information is provided in block delimited by <cp></cp>. | |
Use the following customer related information (delimited by <cp></cp>) and context (delimited by <ctx></ctx>) to answer the question at the end by thinking step by step alongwith reaonsing steps: | |
If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
Use the customer information to replace entities in the question before answering\n | |
\n""" | |
template2 = """ | |
<ctx> | |
{context} | |
</ctx> | |
<hs> | |
{history} | |
</hs> | |
Question: {question} | |
Answer: """ | |
prompt_template = template1 + "<cp>\n" + cProfile + "\n</cp>\n" + template2 | |
PROMPT = PromptTemplate(template=prompt_template, input_variables=["history", "context", "question"]) | |
return PROMPT | |
vectordb = createVectorDB(loadKB(False, False, uploads_dir, None)) | |
def test(): | |
return "Docker hello" | |
def KBUpload(): | |
return render_template("KBTrain.html") | |
def aiassist(): | |
return render_template("index.html") | |
def process_json(): | |
print("RQST") | |
print(f"\n{'*' * 100}\n") | |
print("Request Received >>>>>>>>>>>>>>>>>>","%H:%M:%S")) | |
content_type = request.headers.get('Content-Type') | |
if (content_type == 'application/json'): | |
requestQuery = request.get_json() | |
print(type(requestQuery)) | |
custDetailsPresent=False | |
customerName="" | |
customerDistrict="" | |
if("custDetails" in requestQuery): | |
custDetailsPresent = True | |
customerName=requestQuery['custDetails']['cName'] | |
customerDistrict=requestQuery['custDetails']['cDistrict'] | |
print("chain initiation") | |
chainRAG=getRAGChain(customerName, customerDistrict, custDetailsPresent,vectordb) | |
print("chain created") | |
suggestionArray = [] | |
for index, query in enumerate(requestQuery['message']): | |
#message = answering(query) | |
relevantDoc = vectordb.similarity_search_with_score(query) | |
for doc in relevantDoc: | |
print(f"\n{'-' * 100}\n") | |
print("Document Source>>>>>> " + doc[len(doc) - 2].metadata['source'] + "\n\n") | |
print("Page Content>>>>>> " + doc[len(doc) - 2].page_content + "\n\n") | |
print("Similarity Score>>>> " + str(doc[len(doc) - 1])) | |
print(f"\n{'-' * 100}\n") | |
message ={"query": query}) | |
print("query:",query) | |
print("Response:", message) | |
if "I don't know" in message: | |
message = "Dear Sir/ Ma'am, Could you please ask questions relevant to Jio?" | |
responseJSON={"message":message,"id":index} | |
suggestionArray.append(responseJSON) | |
return jsonify(suggestions=suggestionArray) | |
else: | |
return 'Content-Type not supported!' | |
def file_Upload(): | |
fileprovided = not request.files.getlist('files[]')[0].filename == '' | |
urlProvided = not request.form.getlist('weburl')[0] == '' | |
print("*******") | |
print("File Provided:" + str(fileprovided)) | |
print("URL Provided:" + str(urlProvided)) | |
print("*******") | |
print(uploads_dir) | |
documents = loadKB(fileprovided, urlProvided, uploads_dir, request) | |
vectordb=createVectorDB(documents) | |
return render_template("index.html") | |
if __name__ == '__main__': | |'', port=int(os.environ.get('PORT', 7860))) | |