Spaces:
Runtime error
Runtime error
import openai | |
import numpy as np | |
import pandas as pd | |
import os | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings | |
from langchain import HuggingFaceHub | |
from langchain.vectorstores import Chroma | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.llms import OpenAI | |
from langchain.chains import RetrievalQA | |
from langchain.chains import VectorDBQA | |
from langchain.document_loaders import TextLoader, WebBaseLoader, SeleniumURLLoader | |
from langchain.document_loaders import UnstructuredFileLoader | |
from flask import Flask, jsonify, render_template, request | |
from werkzeug.utils import secure_filename | |
from werkzeug.datastructures import FileStorage | |
import nltk | |
nltk.download("punkt") | |
import warnings | |
warnings.filterwarnings("ignore") | |
openai.api_key=os.getenv("OPENAI_API_KEY") | |
import flask | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
global documents | |
loader = UnstructuredFileLoader('Jio.txt', mode='elements') | |
documents= loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150) | |
texts = text_splitter.split_documents(documents) | |
embeddings = OpenAIEmbeddings() | |
vectordb = Chroma.from_documents(texts,embeddings) | |
chain = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0.0),chain_type="stuff", retriever=vectordb.as_retriever(search_type="mmr"),return_source_documents=True) | |
app = flask.Flask(__name__, template_folder="./") | |
# Create a directory in a known location to save files to. | |
uploads_dir = os.path.join(app.root_path,'static', 'uploads') | |
os.makedirs(uploads_dir, exist_ok=True) | |
def index(): | |
return flask.render_template('index.html') | |
def process_json(): | |
content_type = request.headers.get('Content-Type') | |
if (content_type == 'application/json'): | |
userQuery = request.get_json()['query'] | |
responseJSON=chain({"query":userQuery}); | |
pretty_print_docs(responseJSON['source_documents']) | |
print("Ques:>>>>"+userQuery+"\n Ans:>>>"+responseJSON["result"]) | |
return jsonify(botMessage=responseJSON["result"]); | |
else: | |
return 'Content-Type not supported!' | |
def file_Upload(): | |
fileprovided=not request.files.getlist('files[]')[0].filename=='' | |
urlProvided=not request.form.getlist('weburl')[0]=='' | |
print("*******") | |
print("File Provided:"+str(fileprovided)) | |
print("URL Provided:"+str(urlProvided)) | |
print("*******") | |
print(not ('documents' in vars() or 'documents' in globals())) | |
# if not ('documents' in vars() or 'documents' in globals()): | |
if fileprovided: | |
documents = [] | |
#Delete Files | |
for filename in os.listdir(uploads_dir): | |
file_path = os.path.join(uploads_dir, filename) | |
print("Clearing Doc Directory. Trying to delete"+file_path) | |
try: | |
if os.path.isfile(file_path) or os.path.islink(file_path): | |
os.unlink(file_path) | |
elif os.path.isdir(file_path): | |
shutil.rmtree(file_path) | |
except Exception as e: | |
print('Failed to delete %s. Reason: %s' % (file_path, e)) | |
#Read and Embed New Files provided | |
for file in request.files.getlist('files[]'): | |
print(file.filename) | |
file.save(os.path.join(uploads_dir, secure_filename(file.filename))) | |
loader = UnstructuredFileLoader(os.path.join(uploads_dir, secure_filename(file.filename)), mode='elements') | |
documents.extend(loader.load()) | |
# else: | |
# loader = UnstructuredFileLoader('Jio.txt', mode='elements') | |
# documents.extend(loader.load()) | |
if urlProvided: | |
urlList=request.form.getlist('weburl') | |
print(urlList) | |
urlLoader=SeleniumURLLoader(urlList) | |
documents.extend(urlLoader.load()) | |
print(uploads_dir) | |
global chain; | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150) | |
texts = text_splitter.split_documents(documents) | |
embeddings = OpenAIEmbeddings() | |
vectordb = Chroma.from_documents(texts,embeddings) | |
chain = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0.0),chain_type="stuff", retriever=vectordb.as_retriever(search_type="mmr"),return_source_documents=True) | |
return render_template("index.html") | |
def KBUpload(): | |
return render_template("KBTrain.html") | |
def aiassist(): | |
return render_template("index.html") | |
def pretty_print_docs(docs): | |
print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + "Document Source>>> "+d.metadata['source']+"\n\nContent>>> "+d.page_content for i, d in enumerate(docs)])) | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860))) | |