Spaces:
Runtime error
Runtime error
File size: 5,485 Bytes
ee3932a 4b74957 ee3932a 31b15dc ee3932a ab131b5 ee3932a a8d3828 394f5e8 ee3932a 7107eed ee3932a 9848b7b ee3932a c4cdd35 ee3932a 0de262f e96dc38 da88160 28cb6dd da88160 0de262f ef26f5a ee3932a b7dc108 50e33c3 0cc294c 1626fcd d20f374 399e756 0cc294c 399e756 0cc294c 399e756 1626fcd 0cc294c b8ee1bf 611b4db d3cb6bd 0cc294c 8f41bd2 67867db 8f41bd2 b7dc108 9777155 b7dc108 67867db b7dc108 50e33c3 394f5e8 b7dc108 f9a221e b7dc108 c4cdd35 b7dc108 f9a221e b7dc108 f9a221e b7dc108 cefd40c ae6ac2c cefd40c ee3932a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import openai
import numpy as np
import pandas as pd
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain import HuggingFaceHub
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader, WebBaseLoader, SeleniumURLLoader
from langchain.document_loaders import UnstructuredFileLoader
from flask import Flask, jsonify, render_template, request
from werkzeug.utils import secure_filename
from werkzeug.datastructures import FileStorage
import nltk
nltk.download("punkt")
import warnings
warnings.filterwarnings("ignore")
openai.api_key=os.getenv("OPENAI_API_KEY")
import flask
import os
from dotenv import load_dotenv
load_dotenv()
loader = UnstructuredFileLoader('Jio.txt', mode='elements')
documents= loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(texts,embeddings)
chain = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0.0),chain_type="stuff", retriever=vectordb.as_retriever(search_type="mmr"),return_source_documents=True)
app = flask.Flask(__name__, template_folder="./")
# Create a directory in a known location to save files to.
uploads_dir = os.path.join(app.root_path,'static', 'uploads')
os.makedirs(uploads_dir, exist_ok=True)
@app.route('/Home')
def index():
return flask.render_template('index.html')
@app.route('/post_json', methods=['POST'])
def process_json():
content_type = request.headers.get('Content-Type')
if (content_type == 'application/json'):
userQuery = request.get_json()['query']
responseJSON=chain({"query":userQuery});
print("Retrieved Document List START ***********************\n\n")
pretty_print_docs(responseJSON['source_documents'])
print("Retrieved Document List END ***********************\n\n")
print("Ques:>>>>"+userQuery+"\n Ans:>>>"+responseJSON["result"])
return jsonify(botMessage=responseJSON["result"]);
else:
return 'Content-Type not supported!'
@app.route('/file_upload',methods=['POST'])
def file_Upload():
fileprovided=not request.files.getlist('files[]')[0].filename==''
urlProvided=not request.form.getlist('weburl')[0]==''
print("*******")
print("File Provided:"+str(fileprovided))
print("URL Provided:"+str(urlProvided))
print("*******")
print(not ('documents' in vars() or 'documents' in globals()))
# if not ('documents' in vars() or 'documents' in globals()):
documents = []
if fileprovided:
#Delete Files
for filename in os.listdir(uploads_dir):
file_path = os.path.join(uploads_dir, filename)
print("Clearing Doc Directory. Trying to delete"+file_path)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print('Failed to delete %s. Reason: %s' % (file_path, e))
#Read and Embed New Files provided
for file in request.files.getlist('files[]'):
print(file.filename)
file.save(os.path.join(uploads_dir, secure_filename(file.filename)))
loader = UnstructuredFileLoader(os.path.join(uploads_dir, secure_filename(file.filename)), mode='elements')
documents.extend(loader.load())
else:
loader = UnstructuredFileLoader('Jio.txt', mode='elements')
documents.extend(loader.load())
if urlProvided:
weburl=request.form.getlist('weburl')
print(weburl)
urlList=weburl[0].split(';')
print(urlList)
urlLoader=SeleniumURLLoader(urlList)
documents.extend(urlLoader.load())
print(uploads_dir)
global chain;
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
texts = text_splitter.split_documents(documents)
print("All chunk List START ***********************\n\n")
pretty_print_docs(texts)
print("All chunk List END ***********************\n\n")
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(texts,embeddings)
chain = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0.0),chain_type="stuff", retriever=vectordb.as_retriever(search_type="mmr"),return_source_documents=True)
return render_template("index.html")
@app.route('/')
def KBUpload():
return render_template("KBTrain.html")
@app.route('/aiassist')
def aiassist():
return render_template("index.html")
def pretty_print_docs(docs):
print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + "Document Source>>> "+d.metadata['source']+"\n\nContent>>> "+d.page_content for i, d in enumerate(docs)]))
if __name__ == '__main__':
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
|