SearchDemo / app.py
demoPOC's picture
Update app.py
7107eed
raw
history blame
5.12 kB
import openai
import numpy as np
import pandas as pd
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain import HuggingFaceHub
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader, WebBaseLoader, SeleniumURLLoader
from langchain.document_loaders import UnstructuredFileLoader
from flask import Flask, jsonify, render_template, request
from werkzeug.utils import secure_filename
from werkzeug.datastructures import FileStorage
import nltk
nltk.download("punkt")
import warnings
warnings.filterwarnings("ignore")
openai.api_key=os.getenv("OPENAI_API_KEY")
import flask
import os
from dotenv import load_dotenv
load_dotenv()
global documents
loader = UnstructuredFileLoader('Jio.txt', mode='elements')
documents= loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(texts,embeddings)
chain = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0.0),chain_type="stuff", retriever=vectordb.as_retriever(search_type="mmr"),return_source_documents=True)
app = flask.Flask(__name__, template_folder="./")
# Create a directory in a known location to save files to.
uploads_dir = os.path.join(app.root_path,'static', 'uploads')
os.makedirs(uploads_dir, exist_ok=True)
@app.route('/Home')
def index():
return flask.render_template('index.html')
@app.route('/post_json', methods=['POST'])
def process_json():
content_type = request.headers.get('Content-Type')
if (content_type == 'application/json'):
userQuery = request.get_json()['query']
responseJSON=chain({"query":userQuery});
pretty_print_docs(responseJSON['source_documents'])
print("Ques:>>>>"+userQuery+"\n Ans:>>>"+responseJSON["result"])
return jsonify(botMessage=responseJSON["result"]);
else:
return 'Content-Type not supported!'
@app.route('/file_upload',methods=['POST'])
def file_Upload():
fileprovided=not request.files.getlist('files[]')[0].filename==''
urlProvided=not request.form.getlist('weburl')[0]==''
print("*******")
print("File Provided:"+str(fileprovided))
print("URL Provided:"+str(urlProvided))
print("*******")
print(not ('documents' in vars() or 'documents' in globals()))
# if not ('documents' in vars() or 'documents' in globals()):
if fileprovided:
documents = []
#Delete Files
for filename in os.listdir(uploads_dir):
file_path = os.path.join(uploads_dir, filename)
print("Clearing Doc Directory. Trying to delete"+file_path)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print('Failed to delete %s. Reason: %s' % (file_path, e))
#Read and Embed New Files provided
for file in request.files.getlist('files[]'):
print(file.filename)
file.save(os.path.join(uploads_dir, secure_filename(file.filename)))
loader = UnstructuredFileLoader(os.path.join(uploads_dir, secure_filename(file.filename)), mode='elements')
documents.extend(loader.load())
# else:
# loader = UnstructuredFileLoader('Jio.txt', mode='elements')
# documents.extend(loader.load())
if urlProvided:
urlList=request.form.getlist('weburl')
print(urlList)
urlLoader=SeleniumURLLoader(urlList)
documents.extend(urlLoader.load())
print(uploads_dir)
global chain;
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(texts,embeddings)
chain = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0.0),chain_type="stuff", retriever=vectordb.as_retriever(search_type="mmr"),return_source_documents=True)
return render_template("index.html")
@app.route('/')
def KBUpload():
return render_template("KBTrain.html")
@app.route('/aiassist')
def aiassist():
return render_template("index.html")
def pretty_print_docs(docs):
print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + "Document Source>>> "+d.metadata['source']+"\n\nContent>>> "+d.page_content for i, d in enumerate(docs)]))
if __name__ == '__main__':
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))