|
import openai |
|
import os |
|
|
|
|
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
from flask import Flask, jsonify, render_template, request |
|
import requests, json |
|
|
|
|
|
|
|
|
|
|
|
import shutil |
|
from werkzeug.utils import secure_filename |
|
from werkzeug.datastructures import FileStorage |
|
|
|
import nltk |
|
|
|
from datetime import datetime |
|
|
|
import openai |
|
from langchain.llms import OpenAI |
|
|
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
|
|
|
|
from langchain.document_loaders import SeleniumURLLoader, PyPDFLoader |
|
|
|
from langchain.vectorstores import Chroma |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
|
from langchain.chains import VectorDBQA |
|
|
|
from langchain.document_loaders import UnstructuredFileLoader, TextLoader |
|
from langchain import PromptTemplate |
|
|
|
from langchain.chains import RetrievalQA |
|
from langchain.memory import ConversationBufferWindowMemory |
|
|
|
import warnings |
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
|
|
|
|
app = Flask(__name__, template_folder="./") |
|
|
|
|
|
uploads_dir = os.path.join(app.root_path,'static', 'searchUploads') |
|
|
|
os.makedirs(uploads_dir, exist_ok=True) |
|
|
|
|
|
def pretty_print_docs(docs): |
|
print(f"\n{'-' * 100}\n".join([f"Document {i + 1}:\n\n" + "Document Length>>>" + str( |
|
len(d.page_content)) + "\n\nDocument Source>>> " + d.metadata['source'] + "\n\nContent>>> " + d.page_content for |
|
i, d in enumerate(docs)])) |
|
|
|
|
|
def getEmbeddingModel(embeddingId): |
|
|
|
|
|
|
|
|
|
return OpenAIEmbeddings() |
|
|
|
|
|
def getLLMModel(LLMID): |
|
llm = OpenAI(temperature=0.0) |
|
return llm |
|
|
|
|
|
def clearKBUploadDirectory(uploads_dir): |
|
for filename in os.listdir(uploads_dir): |
|
file_path = os.path.join(uploads_dir, filename) |
|
print("Clearing Doc Directory. Trying to delete" + file_path) |
|
try: |
|
if os.path.isfile(file_path) or os.path.islink(file_path): |
|
os.unlink(file_path) |
|
elif os.path.isdir(file_path): |
|
shutil.rmtree(file_path) |
|
except Exception as e: |
|
print('Failed to delete %s. Reason: %s' % (file_path, e)) |
|
|
|
|
|
def loadKB(fileprovided, urlProvided, uploads_dir, request): |
|
documents = [] |
|
if fileprovided: |
|
|
|
clearKBUploadDirectory(uploads_dir) |
|
|
|
for file in request.files.getlist('files[]'): |
|
print("File Received>>>" + file.filename) |
|
file.save(os.path.join(uploads_dir, secure_filename(file.filename))) |
|
loader = PyPDFLoader(os.path.join(uploads_dir, secure_filename(file.filename))) |
|
documents.extend(loader.load()) |
|
else: |
|
loader = TextLoader('Jio.txt') |
|
documents.extend(loader.load()) |
|
|
|
if urlProvided: |
|
weburl = request.form.getlist('weburl') |
|
print(weburl) |
|
urlList = weburl[0].split(';') |
|
print(urlList) |
|
print("Selenium Started", datetime.now().strftime("%H:%M:%S")) |
|
|
|
urlLoader = SeleniumURLLoader(urlList) |
|
print("Selenium Completed", datetime.now().strftime("%H:%M:%S")) |
|
documents.extend(urlLoader.load()) |
|
print("inside selenium loader:") |
|
print(documents) |
|
|
|
return documents |
|
|
|
|
|
def getRAGChain(customerName,customerDistrict, custDetailsPresent,vectordb): |
|
chain = RetrievalQA.from_chain_type( |
|
llm=getLLMModel(0), |
|
chain_type='stuff', |
|
retriever=vectordb.as_retriever(), |
|
verbose=False, |
|
chain_type_kwargs={ |
|
"verbose": False, |
|
"prompt": createPrompt(customerName, customerDistrict, custDetailsPresent), |
|
"memory": ConversationBufferWindowMemory( |
|
k=3, |
|
memory_key="history", |
|
input_key="question"), |
|
} |
|
) |
|
return chain |
|
|
|
def createVectorDB(documents): |
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150) |
|
texts = text_splitter.split_documents(documents) |
|
print("All chunk List START ***********************\n\n") |
|
pretty_print_docs(texts) |
|
print("All chunk List END ***********************\n\n") |
|
embeddings = getEmbeddingModel(0) |
|
vectordb = Chroma.from_documents(texts, embeddings) |
|
return vectordb |
|
|
|
def createPrompt(cName, cCity, custDetailsPresent): |
|
cProfile = "Customer's Name is " + cName + "\nCustomer's lives in or customer's Resident State or Customer's place is " + cCity + "\n" |
|
print(cProfile) |
|
|
|
template1 = """You role is of a Professional Customer Support Executive and your name is Jio AIAssist. |
|
You are talking to the below customer whose information is provided in block delimited by <cp></cp>. |
|
Use the following customer related information (delimited by <cp></cp>) and context (delimited by <ctx></ctx>) to answer the question at the end by thinking step by step alongwith reaonsing steps: |
|
If you don't know the answer, just say that you don't know, don't try to make up an answer. |
|
Use the customer information to replace entities in the question before answering\n |
|
\n""" |
|
|
|
template2 = """ |
|
<ctx> |
|
{context} |
|
</ctx> |
|
<hs> |
|
{history} |
|
</hs> |
|
Question: {question} |
|
Answer: """ |
|
|
|
prompt_template = template1 + "<cp>\n" + cProfile + "\n</cp>\n" + template2 |
|
PROMPT = PromptTemplate(template=prompt_template, input_variables=["history", "context", "question"]) |
|
return PROMPT |
|
|
|
vectordb = createVectorDB(loadKB(False, False, uploads_dir, None)) |
|
|
|
@app.route('/', methods=['GET']) |
|
def test(): |
|
return "Docker hello" |
|
|
|
@app.route('/KBUploader') |
|
def KBUpload(): |
|
return render_template("KBTrain.html") |
|
|
|
@app.route('/aiassist') |
|
def aiassist(): |
|
return render_template("index.html") |
|
|
|
@app.route('/aiSearch') |
|
def html(): |
|
return render_template("AISearch.html") |
|
|
|
@app.route('/searchKB') |
|
def KBUpload(): |
|
return render_template("SearchKB.html") |
|
|
|
@app.route('/agent/chat/suggestion', methods=['POST']) |
|
def process_json(): |
|
print(f"\n{'*' * 100}\n") |
|
print("Request Received >>>>>>>>>>>>>>>>>>", datetime.now().strftime("%H:%M:%S")) |
|
content_type = request.headers.get('Content-Type') |
|
if (content_type == 'application/json'): |
|
requestQuery = request.get_json() |
|
print() |
|
|
|
relevantDoc=vectordb.similarity_search_with_score(requestQuery['query'],distance_metric="cos", k = 3) |
|
searchResultArray=[] |
|
for doc in relevantDoc: |
|
searchResult = {} |
|
print(f"\n{'-' * 100}\n") |
|
searchResult['documentSource']=doc[len(doc)-2].metadata['source'] |
|
searchResult['pageContent']=doc[len(doc)-2].page_content |
|
searchResult['similarityScore']=str(doc[len(doc)-1]) |
|
print(doc) |
|
print("Document Source>>>>>> "+searchResult['documentSource']+"\n\n") |
|
print("Page Content>>>>>> "+searchResult['pageContent']+"\n\n") |
|
print("Similarity Score>>>> "+searchResult['similarityScore']) |
|
print(f"\n{'-' * 100}\n") |
|
searchResultArray.append(searchResult) |
|
print(f"\n{'*' * 100}\n") |
|
|
|
return jsonify(botMessage=searchResultArray) |
|
else: |
|
return 'Content-Type not supported!' |
|
|
|
@app.route('/file_upload', methods=['POST']) |
|
def file_Upload(): |
|
fileprovided=not request.files.getlist('files[]')[0].filename=='' |
|
urlProvided=not request.form.getlist('weburl')[0]=='' |
|
print("*******") |
|
print("File Provided:"+str(fileprovided)) |
|
print("URL Provided:"+str(urlProvided)) |
|
print("*******") |
|
|
|
documents = [] |
|
if fileprovided: |
|
|
|
for filename in os.listdir(uploads_dir): |
|
file_path = os.path.join(uploads_dir, filename) |
|
print("Clearing Doc Directory. Trying to delete"+file_path) |
|
try: |
|
if os.path.isfile(file_path) or os.path.islink(file_path): |
|
os.unlink(file_path) |
|
elif os.path.isdir(file_path): |
|
shutil.rmtree(file_path) |
|
except Exception as e: |
|
print('Failed to delete %s. Reason: %s' % (file_path, e)) |
|
|
|
for file in request.files.getlist('files[]'): |
|
print("File Received>>>"+file.filename) |
|
file.save(os.path.join(uploads_dir, secure_filename(file.filename))) |
|
|
|
loader = PyPDFLoader(os.path.join(uploads_dir, secure_filename(file.filename))) |
|
documents.extend(loader.load()) |
|
if urlProvided: |
|
weburl=request.form.getlist('weburl') |
|
print(weburl) |
|
urlList=weburl[0].split(';') |
|
print(urlList) |
|
print("Selenium Started", datetime.now().strftime("%H:%M:%S")) |
|
|
|
urlLoader=SeleniumURLLoader(urlList) |
|
print("Selenium Completed", datetime.now().strftime("%H:%M:%S")) |
|
documents.extend(urlLoader.load()) |
|
|
|
print(uploads_dir) |
|
global chain; |
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150) |
|
|
|
texts = text_splitter.split_documents(documents) |
|
|
|
print("All chunk List START ***********************\n\n") |
|
pretty_print_docs(texts) |
|
|
|
print("All chunk List END ***********************\n\n") |
|
|
|
|
|
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings |
|
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") |
|
global vectordb |
|
|
|
vectordb=Chroma.from_documents(documents=texts, embedding=embeddings, collection_metadata={"hnsw:space": "cosine"}) |
|
return render_template("AISearch.html") |
|
|
|
if __name__ == '__main__': |
|
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860))) |
|
|
|
|