import openai import os openai.api_key=os.getenv("OPENAI_API_KEY") from dotenv import load_dotenv load_dotenv() from flask import Flask, jsonify, render_template, request import requests, json # import nltk #"punkt") import shutil from werkzeug.utils import secure_filename from werkzeug.datastructures import FileStorage import nltk from datetime import datetime import openai from langchain.llms import OpenAI from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings import HuggingFaceBgeEmbeddings from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.embeddings import SentenceTransformerEmbeddings from langchain.document_loaders import SeleniumURLLoader, PyPDFLoader from langchain.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import VectorDBQA from langchain.document_loaders import UnstructuredFileLoader, TextLoader from langchain import PromptTemplate from langchain.chains import RetrievalQA from langchain.memory import ConversationBufferWindowMemory import warnings warnings.filterwarnings("ignore") #app = Flask(__name__) app = Flask(__name__, template_folder="./") # Create a directory in a known location to save files to. uploads_dir = os.path.join(app.root_path,'static', 'uploads') os.makedirs(uploads_dir, exist_ok=True) def pretty_print_docs(docs): print(f"\n{'-' * 100}\n".join([f"Document {i + 1}:\n\n" + "Document Length>>>" + str( len(d.page_content)) + "\n\nDocument Source>>> " + d.metadata['source'] + "\n\nContent>>> " + d.page_content for i, d in enumerate(docs)])) def getEmbeddingModel(embeddingId): if (embeddingId == 1): embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") elif (embeddingId == 2): model_name = "hkunlp/instructor-large" model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': True} embeddings = HuggingFaceInstructEmbeddings(model_name=model_name,model_kwargs=model_kwargs,encode_kwargs=encode_kwargs) elif (embeddingId == 2): model_name = "BAAI/bge-large-en-v1.5" model_kwargs = {'device': 'cuda'} encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity model = HuggingFaceBgeEmbeddings(model_name=model_name,model_kwargs=model_kwargs,encode_kwargs=encode_kwargs) else: embeddings = OpenAIEmbeddings() return OpenAIEmbeddings() def getLLMModel(LLMID): llm = OpenAI(temperature=0.0) return llm def clearKBUploadDirectory(uploads_dir): for filename in os.listdir(uploads_dir): file_path = os.path.join(uploads_dir, filename) print("Clearing Doc Directory. Trying to delete" + file_path) try: if os.path.isfile(file_path) or os.path.islink(file_path): os.unlink(file_path) elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print('Failed to delete %s. Reason: %s' % (file_path, e)) def loadKB(fileprovided, urlProvided, uploads_dir, request): documents = [] if fileprovided: # Delete Files clearKBUploadDirectory(uploads_dir) # Read and Embed New Files provided for file in request.files.getlist('files[]'): print("File Received>>>" + file.filename), secure_filename(file.filename))) loader = PyPDFLoader(os.path.join(uploads_dir, secure_filename(file.filename))) documents.extend(loader.load()) else: loader = TextLoader('Jio.txt') documents.extend(loader.load()) if urlProvided: weburl = request.form.getlist('weburl') print(weburl) urlList = weburl[0].split(';') print(urlList) print("Selenium Started","%H:%M:%S")) # urlLoader=RecursiveUrlLoader(urlList[0]) urlLoader = SeleniumURLLoader(urlList) print("Selenium Completed","%H:%M:%S")) documents.extend(urlLoader.load()) print("inside selenium loader:") print(documents) return documents def getRAGChain(customerName,customerDistrict, custDetailsPresent,vectordb): chain = RetrievalQA.from_chain_type( llm=getLLMModel(0), chain_type='stuff', retriever=vectordb.as_retriever(), verbose=False, chain_type_kwargs={ "verbose": False, "prompt": createPrompt(customerName, customerDistrict, custDetailsPresent), "memory": ConversationBufferWindowMemory( k=3, memory_key="history", input_key="question"), } ) return chain def createVectorDB(documents): text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150) texts = text_splitter.split_documents(documents) print("All chunk List START ***********************\n\n") pretty_print_docs(texts) print("All chunk List END ***********************\n\n") embeddings = getEmbeddingModel(0) vectordb = Chroma.from_documents(texts, embeddings) return vectordb def createPrompt(cName, cCity, custDetailsPresent): cProfile = "Customer's Name is " + cName + "\nCustomer's lives in or customer's Resident State or Customer's place is " + cCity + "\n" print(cProfile) template1 = """You role is of a Professional Customer Support Executive and your name is Jio AIAssist. You are talking to the below customer whose information is provided in block delimited by . Use the following customer related information (delimited by ) and context (delimited by ) to answer the question at the end by thinking step by step alongwith reaonsing steps: If you don't know the answer, just say that you don't know, don't try to make up an answer. Use the customer information to replace entities in the question before answering\n \n""" template2 = """ {context} {history} Question: {question} Answer: """ prompt_template = template1 + "\n" + cProfile + "\n\n" + template2 PROMPT = PromptTemplate(template=prompt_template, input_variables=["history", "context", "question"]) return PROMPT vectordb = createVectorDB(loadKB(False, False, uploads_dir, None)) @app.route('/', methods=['GET']) def test(): return "Docker hello" @app.route('/KBUploader') def KBUpload(): return render_template("KBTrain.html") @app.route('/aiassist') def aiassist(): return render_template("index.html") @app.route('/agent/chat/suggestion', methods=['POST']) def process_json(): print("RQST") print(f"\n{'*' * 100}\n") print("Request Received >>>>>>>>>>>>>>>>>>","%H:%M:%S")) content_type = request.headers.get('Content-Type') if (content_type == 'application/json'): requestQuery = request.get_json() print(type(requestQuery)) custDetailsPresent=False customerName="" customerDistrict="" if("custDetails" in requestQuery): custDetailsPresent = True customerName=requestQuery['custDetails']['cName'] customerDistrict=requestQuery['custDetails']['cDistrict'] print("chain initiation") chainRAG=getRAGChain(customerName, customerDistrict, custDetailsPresent,vectordb) print("chain created") suggestionArray = [] for index, query in enumerate(requestQuery['message']): #message = answering(query) relevantDoc = vectordb.similarity_search_with_score(query) for doc in relevantDoc: print(f"\n{'-' * 100}\n") print("Document Source>>>>>> " + doc[len(doc) - 2].metadata['source'] + "\n\n") print("Page Content>>>>>> " + doc[len(doc) - 2].page_content + "\n\n") print("Similarity Score>>>> " + str(doc[len(doc) - 1])) print(f"\n{'-' * 100}\n") message ={"query": query}) print("query:",query) print("Response:", message) if "I don't know" in message: message = "Dear Sir/ Ma'am, Could you please ask questions relevant to Jio?" responseJSON={"message":message,"id":index} suggestionArray.append(responseJSON) return jsonify(suggestions=suggestionArray) else: return 'Content-Type not supported!' @app.route('/file_upload', methods=['POST']) def file_Upload(): fileprovided = not request.files.getlist('files[]')[0].filename == '' urlProvided = not request.form.getlist('weburl')[0] == '' print("*******") print("File Provided:" + str(fileprovided)) print("URL Provided:" + str(urlProvided)) print("*******") print(uploads_dir) documents = loadKB(fileprovided, urlProvided, uploads_dir, request) vectordb=createVectorDB(documents) return render_template("index.html") if __name__ == '__main__':'', port=int(os.environ.get('PORT', 7860)))