Redmind's picture
Update app.py
a9c1aca verified
raw
history blame
61.7 kB
from PIL import Image
import base64
from io import BytesIO
import os
import re
import tempfile
import wave
import requests
import gradio as gr
import time
import shutil
import json
import nltk
import mysql.connector
import fnmatch
# audio related code is not included based on Arun's input
# audio package
import speech_recognition as sr
from pydub import AudioSegment
from pydub.playback import play
# SMTP code is not included since HFSpaces doesn't support it
# email library
import smtplib, ssl
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
# langchain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableSequence, RunnableLambda
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.utilities import SQLDatabase
from langchain.agents import create_tool_calling_agent, AgentExecutor, Tool
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.tools import StructuredTool
#from langchain.pydantic_v1 import BaseModel, Field
from pydantic import BaseModel, Field
from PyPDF2 import PdfReader
from nltk.tokenize import sent_tokenize
from datetime import datetime
from sqlalchemy import create_engine
from sqlalchemy.sql import text
import openai
# pandas
import pandas as pd
from pandasai.llm.openai import OpenAI
from pandasai import SmartDataframe
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# langfuse analytics
from langfuse.callback import CallbackHandler
# Inventory API data table
from tabulate import tabulate
#forcefully stop the agent execution
import concurrent.futures
import threading
# mailjet_rest to send email
from mailjet_rest import Client
import base64
#for PDF form filling
from PyPDFForm import FormWrapper
#Variables Initialization
agent_executor = None
vector_store1 = None
texts1 = None
excel_dataframe = None
file_extension = None
total_rows = ""
docstatus = ""
sample_table = ""
#This is to define the summary of the runtime tool. This summary will be updated in prompt template and description of the new tool
run_time_tool_summary=""
# Define global variables for managing the thread and current_event
executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
current_event = None
stop_event = threading.Event()
# LangFuse API keys and host settings
os.environ["LANGFUSE_PUBLIC_KEY"] = os.getenv("LANGFUSE_PUBLIC_KEY")
os.environ["LANGFUSE_SECRET_KEY"] = os.getenv("LANGFUSE_SECRET_KEY")
os.environ["LANGFUSE_HOST"] = os.getenv("LANGFUSE_HOST")
DB_USER = 'u852023448_redmindgpt'
DB_PASSWORD = 'redmindGpt@123'
DB_HOST = '217.21.88.10'
DB_NAME = 'u852023448_redmindgpt'
langfuse_handler = CallbackHandler()
langfuse_handler.auth_check() # Optional: Checks if the authentication is successful
nltk.download('punkt')
open_api_key_token = os.getenv("OPEN_AI_API")
os.environ['OPENAI_API_KEY'] = open_api_key_token
pdf_path = "Inbound.pdf"
db_uri = os.getenv("POSTGRESQL_CONNECTION")
# Database setup
db = SQLDatabase.from_uri(db_uri)
user_email = ""
warehouse_name = ""
warehouse_id = ""
# Today's date to be populated in inventory API
inventory_date = datetime.today().strftime('%Y-%m-%d')
apis = [
# fetch warehouse ID
{
"url": "http://193.203.162.39:8383/nxt-wms/userWarehouse/fetchWarehouseForUserId?",
"params": {"query": warehouse_name, "userId": 164}
},
# Stock summary based on warehouse id
{
"url": "http://193.203.162.39:8383/nxt-wms/transactionHistory/stockSummary?",
"params": {"branchId": 343, "onDate": inventory_date, "warehouseId": warehouse_id}
}
]
# LLM setup
llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=300, temperature=0.1)
llm_chart = OpenAI(is_safe=False)
def get_schema(_):
schema_info = db.get_table_info() # This should be a string of your SQL schema
return schema_info
def generate_sql_query(question):
schema = get_schema(None)
template_query_generation = """
Schema: {schema}
Question: {question}
Provide a SQL query to answer the above question using the exact field names and table names specified in the schema.
SQL Query (Please provide only the SQL statement without explanations or formatting):
"""
prompt_query_generation = ChatPromptTemplate.from_template(template_query_generation)
schema_and_question = RunnableLambda(lambda _: {'schema': schema, 'question': question})
sql_chain = RunnableSequence(
schema_and_question,
prompt_query_generation,
llm.bind(stop=["SQL Query End"]), # Adjust the stop sequence to your need
StrOutputParser()
)
sql_query = sql_chain.invoke({})
sql_query = sql_chain.invoke({}, config={"callbacks": [langfuse_handler]})
return sql_query.strip()
def run_query(query):
# Clean the query by removing markdown symbols and trimming whitespace
clean_query = query.replace("```sql", "").replace("```", "").strip()
print(f"Executing SQL Query: {clean_query}")
try:
result = db.run(clean_query)
return result
except Exception as e:
print(f"Error executing query: {e}")
return None
# Define the database query tool
# The function that uses the above models
# Define the function that will handle the database query
def database_tool(question):
sql_query = generate_sql_query(question)
return run_query(sql_query)
def get_ASN_data(question):
base_url = os.getenv("ASN_API_URL")
print(f"base_url{base_url}")
complete_url = f"{base_url}branchMaster.id=343&transactionUid={question}&userId=164&transactionType=ASN"
try:
response = requests.get(complete_url)
print(f"complete_url{complete_url}")
print(f"response{response}")
data = response.json()
response.raise_for_status()
if 'result' in data and 'content' in data['result'] and data['result']['content']:
content = data['result']['content'][0]
trnHeaderAsn = content['trnHeaderAsn']
party = content['party'][0]
transactionUid = trnHeaderAsn['transactionUid']
customerOrderNo = trnHeaderAsn.get('customerOrderNo', 'N/A')
orderDate = trnHeaderAsn.get('orderDate', 'N/A')
customerInvoiceNo = trnHeaderAsn.get('customerInvoiceNo', 'N/A')
invoiceDate = trnHeaderAsn.get('invoiceDate', 'N/A')
expectedReceivingDate = trnHeaderAsn['expectedReceivingDate']
transactionStatus = trnHeaderAsn['transactionStatus']
shipper_code = party['shipper']['code'] if party['shipper'] else 'N/A'
shipper_name = party['shipper']['name'] if party['shipper'] else 'N/A'
data = [
["Transaction UID", transactionUid],
["Customer Order No", customerOrderNo],
["Order Date", orderDate],
["Customer Invoice No", customerInvoiceNo],
["Invoice Date", invoiceDate],
["Expected Receiving Date", expectedReceivingDate],
["Transaction Status", transactionStatus],
["Shipper Code", shipper_code],
["Shipper Name", shipper_name]
]
return f"The ASN details of {question} is {data}."
else:
return "ASN Details are not found. Please contact system administrator."
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
except Exception as err:
print(f"An error occurred: {err}")
def load_and_split_pdf(pdf_path):
reader = PdfReader(pdf_path)
text = ''
for page in reader.pages:
text += page.extract_text()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
texts = text_splitter.split_text(text)
return texts
def create_vector_store(texts):
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_texts(texts, embeddings)
return vector_store
def query_vector_store(vector_store, query, config=None):
if config:
print("Config passed:", config)
docs = vector_store.similarity_search(query, k=5)
print(f"Vector store return: {docs}")
return docs
def summarize_document(docs):
summarized_docs = []
for doc in docs:
if isinstance(doc, list):
doc_content = ' '.join([d.page_content for d in doc])
else:
doc_content = doc.page_content
sentences = sent_tokenize(doc_content)
if len(sentences) > 5:
summarized_content = ' '.join(sentences[:5])
else:
summarized_content = doc_content
summarized_docs.append(summarized_content)
return '\n\n'.join(summarized_docs)
texts = load_and_split_pdf(pdf_path)
vector_store = create_vector_store(texts)
def document_data_tool_runtime(question):
print(f"Document data runtime tool enter: {question} with {vector_store1}")
query_response = query_vector_store(vector_store1, question, config={"callbacks": [langfuse_handler]})
return query_response
def document_data_tool(question):
print(f"Document data tool enter: {question}")
# query_string = question['tags'][0] if 'tags' in question and question['tags'] else ""
query_response = query_vector_store(vector_store, question, config={"callbacks": [langfuse_handler]})
# summarized_response = summarize_document(query_response)
return query_response
# mailjet API since SMTP is not supported HF spaces
def send_email_with_attachment_mailjet(recipient_email, subject, body, attach_img_base64=None):
api_key = os.getenv("MAILJET_API_KEY")
api_secret = os.getenv("MAILJET_API_SECRET")
# Initialize the Mailjet client
mailjet = Client(auth=(api_key, api_secret), version='v3.1')
# Define the email details with an attachment
data = {
'Messages': [
{
"From": {
"Email": "[email protected]",
"Name": "Redmind Technologies"
},
"To": [
{
"Email": recipient_email,
"Name": ""
}
],
"Subject": subject,
"TextPart": body,
"CustomID": "AppGettingStartedTest",
"Attachments": [
{
"ContentType": "image/png", # Replace with the correct MIME type of your image
"Filename": "inventory_report.png", # Name of the image as it will appear in the email
"Base64Content": attach_img_base64 # Base64-encoded image content
}
]
}
]
}
# Send the email
result = mailjet.send.create(data=data)
# Check if the email was sent successfully
if result.status_code == 200:
print("Email sent successfully with attachment!")
else:
print(f"Failed to send email. Status code: {result.status_code}")
print(result.json())
#smtp lib
def send_email_with_attachment(recipient_email, subject, body, attachment_path):
try:
sender_email = os.getenv("EMAIL_SENDER")
sender_password = os.getenv("EMAIL_PASSWORD")
# Create a multipart message
msg = MIMEMultipart()
msg['From'] = sender_email
msg['To'] = recipient_email
msg['Subject'] = subject
# Attach the body with the msg instance
msg.attach(MIMEText(body, 'plain'))
# Open the file to be sent
attachment = open(attachment_path, "rb")
# print("Attached the image")
# Instance of MIMEBase and named as p
part = MIMEBase('application', 'octet-stream')
# To change the payload into encoded form
part.set_payload((attachment).read())
# Encode into base64
encoders.encode_base64(part)
part.add_header('Content-Disposition', f"attachment; filename= {attachment_path}")
# Attach the instance 'part' to instance 'msg'
msg.attach(part)
server = smtplib.SMTP('smtp.gmail.com', 587)
server.starttls()
server.login(sender_email, sender_password)
text = msg.as_string()
server.sendmail(sender_email, recipient_email, text)
server.quit()
except Exception as error:
print(f"An error occurred: {error}")
# return 1
def make_api_request(url, params):
"""Generic function to make API GET requests and return JSON data."""
try:
response = requests.get(url, params=params)
response.raise_for_status() # Raises an HTTPError if the response was an error
return response.json() # Return the parsed JSON data
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
except Exception as err:
print(f"An error occurred: {err}")
def inventory_report(question):
# Split the question to extract warehouse name, user question, and optional email
if question.count(":") > 0:
parts = question.split(":", 2)
warehouse_name= parts[0].strip()
user_question = parts[1].strip()
user_email = parts[2].strip() if len(parts) > 2 else None
print(f"Warehouse: {warehouse_name}, Email: {user_email}, Question: {user_question}")
else:
return "warehouse name not found"
data = make_api_request(apis[0]["url"], apis[0]["params"])
print(data)
if data:
# Extracting the id for the warehouse with the name "WH"
warehouse_id = next((item['id'] for item in data['result'] if item['wareHouseId'] == warehouse_name), None)
if (warehouse_id):
# Step 3: Update the placeholder with the actual warehouse_id
for api in apis:
if "warehouseId" in api["params"]:
api["params"]["warehouseId"] = warehouse_id
data1 = make_api_request(apis[1]["url"], apis[1]["params"])
if (data1):
headers = ["S.No", "Warehouse Code", "Warehouse Name", "Customer Code", "Customer Name", "Item Code", "Item Name",
"Currency", "EAN", "UOM", "Quantity", "Gross Weight", "Volume", "Total Value"]
table_data = []
for index, item in enumerate(data1['result'], start=1):
row = [
index, # Serial number
item['warehouse']['code'],
item['warehouse']['name'],
item['customer']['code'],
item['customer']['name'],
item['skuMaster']['code'],
item['skuMaster']['name'],
item['currency']['code'],
item['eanUpc'],
item['uom']['code'],
item['totalQty'],
item['grossWeight'],
item['volume'],
item['totalValue']
]
table_data.append(row)
# Convert to pandas DataFrame
df = pd.DataFrame(table_data, columns=headers)
chart_link = chat_with_llm(df,question)
return chart_link
else:
return "There are no inventory details for the warehouse you have given."
else:
return "Please provide a warehouse name available in the database."
def chat_with_llm(df,question):
sdf = SmartDataframe(df, config={"llm": llm_chart})
llm_response = sdf.chat(question)
return llm_response
def bind_llm(llm, tools,prompt_template):
llm = llm.bind()
agent = create_tool_calling_agent(llm, tools, ChatPromptTemplate.from_template(prompt_template))
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
return agent_executor
# Define input and output models using Pydantic
class QueryInput(BaseModel):
question: str = Field(
description="The question to be answered by appropriate tool. Please follow the instructions. For API tool, do not send the question as it is. Please send the ASN id.")# Invoke datavisulaization tool by processing the user question and send two inputs to the tool. One input will be the warehouse name and another input to the tool will be the entire user_question itself. Please join those two strings and send them as a single input string with ':' as delimiter")
# config: dict = Field(default={}, description="Optional configuration for the database query.")
# Define the output model for database queries
class QueryOutput(BaseModel):
result: str = Field(...,
description="Display the answer based on the prompts given in each tool. For dataVisualization tool, it sends a image file as output. Please give the image file path only to the gr.Image. For DocumentData tool, Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points.")
# Wrap the function with StructuredTool for better parameter handling
tools = [
StructuredTool(
func=get_ASN_data,
name="APIData",
args_schema=QueryInput,
output_schema=QueryOutput,
description="Tool to get details of ASN api. ASN id will be in the input with the format of first three letters as ASN and it is followed by 11 digit numeral. Pass only the id as input. Do not send the complete user question to the tool. If there are any other queries related to ASN without ASN id, please use the document tool."
),
StructuredTool(
func=document_data_tool,
name="DocumentData",
args_schema=QueryInput,
output_schema=QueryOutput,
description="You are an AI assistant trained to help with warehouse management questions based on a detailed document about our WMS. The document covers various processes such as ASN handling, purchase orders, cross docking, appointment scheduling for shipments, and yard management. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points. "
),
StructuredTool(
func=database_tool,
name="DatabaseQuery",
args_schema=QueryInput,
output_schema=QueryOutput,
description="Tool to query the database based on structured input."
),
StructuredTool(
func=inventory_report,
name="dataVisualization",
args_schema=QueryInput,
output_schema=QueryOutput,
description=""" Tool to generate a visual chart output for a particular warehouse based on the provided question.
This tool processes the user question to identify the warehouse name and the specific request. If the user specifies
an email, include the email in the input. The input format should be: 'warehouse name: user question: email (if any)'.
The tool generates the requested chart and sends it to the provided email if specified.
Examples:
1. Question without email, without warehouse: "Analyze item name and quantity in a bar chart in warehouse"
Input to tool: "I want to analyze item name and quantity in a bar chart"
2. Question with email: "Analyze item name and quantity in a bar chart in warehouse Allcargo Logistics and send email to [email protected]"
Input to tool: "Allcargo Logistics: I want to analyze item name and quantity in a bar chart: [email protected]"
"""
)
]
prompt_template = f"""You are an assistant that helps with database queries, API information, and document retrieval. Your job is to provide clear, complete, and detailed responses to the following queries. Please give the output response in an user friendly way and remove "**" from the response. For example, document related queries can be answered in a clear and concise way with numbering and not as a paragraph. Database related queries should be answered with proper indentation and use numbering for the rows. ASN id related queries should be answered with proper indentation and use numbering for the rows.
For ASN id related questions, if the user specifies an ASN id, provide the information from the api tool. Pass only the id as input to the tool. Do not pass the entire question as input to the tool. If the details are not found, say it in a clear and concise way.
You are an AI assistant trained to help with warehouse management questions based on a detailed document about our WMS. The document covers various processes such as ASN handling, purchase orders, cross docking, appointment scheduling for shipments, and yard management. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points. When answering, focus on providing actionable insights and clear explanations related to the specific query. Please remove "**" from the response.
For SQL database-related questions, only use the fields available in the warehouse schema, including tables such as customer_master, efs_company_master, efs_group_company_master, efs_region_master, party_address_detail, wms_warehouse_master.
For datavisualization, user will ask for inventory report of a particular warehouse. Your job is to return the image path to chat interface and display the image as output.
{{agent_scratchpad}}
Here is the information you need to process:
Question: {{input}}"""
agent_executor = bind_llm(llm,tools,prompt_template)
def ensure_temp_chart_dir():
temp_chart_dir = os.getenv("IMAGE_MAIN_URL")
if not os.path.exists(temp_chart_dir):
os.makedirs(temp_chart_dir)
def clean_gradio_tmp_dir():
tmp_dir = os.getenv("IMAGE_GRADIO_PATH")
if os.path.exists(tmp_dir):
try:
shutil.rmtree(tmp_dir)
except Exception as e:
print(f"Error cleaning up /tmp/gradio/ directory: {e}")
# Define the interface function
max_iterations = 5
iterations = 0
def handle_query(user_question, chatbot, audio=None):
"""
Function to handle the processing of user input with `AgentExecutor.invoke()`.
"""
global current_event, stop_event
# Clear previous stop event and current_event
stop_event.clear()
if current_event and not current_event.done():
chatbot.append(("","A query is already being processed. Please stop it before starting a new one."))
return gr.update(value=chatbot)
# Start the processing in a new thread
current_event = executor.submit(answer_question_thread, user_question, chatbot)
# Periodically check if current_event is done
while not current_event.done():
if stop_event.is_set():
#current_event.task.cancel() # Attempt to cancel the current_event
current_event.set_result((user_question, "Sorry, we encountered an error while processing your request. Please try after some time."))
current_event.cancel() # Attempt to cancel the current_event
executor.shutdown(wait=False) # Shutdown the executor
print("Current event cancelled")
print(current_event.cancelled())
chatbot.append((user_question, "Sorry, we encountered an error while processing your request. Please try after some time."))
return gr.update(value=chatbot)
time.sleep(1) # Wait for 1 second before checking again
if current_event.cancelled():
chatbot.append((user_question, "Sorry, we encountered an error while processing your request. Please try after some time."))
return gr.update(value=chatbot)
else:
try:
user_question1, response_text1 = current_event.result() # Get the result of the completed current_event
print("output")
print(user_question1)
print(response_text1)
chatbot.append((user_question1, response_text1))
return gr.update(value=chatbot)
except Exception as e:
print(f"Error occurred: {e}")
chatbot.append((user_question, "Sorry, we encountered an error while processing your request. Please try after some time."))
return gr.update(value=chatbot)
def stop_processing(chatbot):
"""
Stops the current processing if it's running.
"""
global current_event, stop_event
if current_event and not current_event.done():
stop_event.set() # Signal the process to stop
current_event.cancel() # Attempt to cancel the current_event
chatbot.append(("Sorry, we encountered an error while processing your request. Please try after some time.",""))
return gr.update(value=chatbot)
# This function is for agent executor invoke with the option of stop
def answer_question_thread(user_question, chatbot,audio=None):
global iterations
iterations = 0
# Ensure the temporary chart directory exists
# ensure_temp_chart_dir()
# Clean the /tmp/gradio/ directory
# clean_gradio_tmp_dir()
# Handle audio input if provided
"""
if audio is not None:
sample_rate, audio_data = audio
audio_segment = AudioSegment(
audio_data.tobytes(),
frame_rate=sample_rate,
sample_width=audio_data.dtype.itemsize,
channels=1
)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
audio_segment.export(temp_audio_file.name, format="wav")
temp_audio_file_path = temp_audio_file.name
recognizer = sr.Recognizer()
with sr.AudioFile(temp_audio_file_path) as source:
audio_content = recognizer.record(source)
try:
user_question = recognizer.recognize_google(audio_content)
except sr.UnknownValueError:
user_question = "Sorry, I could not understand the audio."
except sr.RequestError:
user_question = "Could not request results from Google Speech Recognition service."
"""
while iterations < max_iterations:
response = agent_executor.invoke({"input": user_question}, config={"callbacks": [langfuse_handler]}, early_stopping_method="generate")
if isinstance(response, dict):
response_text = response.get("output", "")
else:
response_text = response
if "invalid" not in response_text.lower():
break
iterations += 1
if iterations == max_iterations:
return user_question , "Sorry, I couldn't complete your request" #"The agent could not generate a valid response within the iteration limit."
if os.getenv("IMAGE_PATH") in response_text:
# Open the image file
img = Image.open(os.getenv("IMAGE_PATH"))
# Convert the PIL Image to a base64 encoded string
buffered = BytesIO()
img.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
img = f'<img src="data:image/png;base64,{img_str}" style="width:450px; height:400px;">'
response_text = response.get("output", "").split(".")[0] + img
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
match = re.search(email_pattern, user_question)
if match:
user_email = match.group() # Return the matched email
# email send
if len(user_email) > 0:
# Send email with the chart image attached
send_email_with_attachment_mailjet(
recipient_email=user_email,
subject="Warehouse Inventory Report",
body=response.get("output", "").split(".")[0] + ". This is an auto-generated email containing a chart created using Generative AI.",
# attachment_path=chart_path
attach_img_base64=img_str)
if "send email to" in user_question:
try:
os.remove(img) # Clean up the temporary image file
except Exception as e:
print(f"Error cleaning up image file: {e}")
except Exception as e:
print(f"Error loading image file: {e}")
response_text = "Chart generation failed. Please try again."
return user_question, response_text
else:
return user_question, response_text
# response_text = response_text.replace('\n', ' ').replace(' ', ' ').strip()
# return response_text
# without forceful stop option
def answer_question(user_question, chatbot, audio=None):
global iterations
iterations = 0
# Ensure the temporary chart directory exists
# ensure_temp_chart_dir()
# Clean the /tmp/gradio/ directory
# clean_gradio_tmp_dir()
# Handle audio input if provided
if audio is not None:
sample_rate, audio_data = audio
audio_segment = AudioSegment(
audio_data.tobytes(),
frame_rate=sample_rate,
sample_width=audio_data.dtype.itemsize,
channels=1
)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
audio_segment.export(temp_audio_file.name, format="wav")
temp_audio_file_path = temp_audio_file.name
recognizer = sr.Recognizer()
with sr.AudioFile(temp_audio_file_path) as source:
audio_content = recognizer.record(source)
try:
user_question = recognizer.recognize_google(audio_content)
except sr.UnknownValueError:
user_question = "Sorry, I could not understand the audio."
except sr.RequestError:
user_question = "Could not request results from Google Speech Recognition service."
while iterations < max_iterations:
response = agent_executor.invoke({"input": user_question}, config={"callbacks": [langfuse_handler]})
if isinstance(response, dict):
response_text = response.get("output", "")
else:
response_text = response
if "invalid" not in response_text.lower():
break
iterations += 1
if iterations == max_iterations:
return "The agent could not generate a valid response within the iteration limit."
if os.getenv("IMAGE_PATH") in response_text:
# Open the image file
img = Image.open(os.getenv("IMAGE_PATH"))
# Convert the PIL Image to a base64 encoded string
buffered = BytesIO()
img.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
img = f'<img src="data:image/png;base64,{img_str}" style="width:450px; height:400px;">'
chatbot.append((user_question, img))
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
match = re.search(email_pattern, user_question)
if match:
user_email = match.group() # Return the matched email
# email send
if len(user_email) > 0:
# Send email with the chart image attached
send_email_with_attachment_mailjet(
recipient_email=user_email,
subject="Warehouse Inventory Report",
body=response.get("output", "").split(".")[0],
# attachment_path=chart_path
attachment_path=img_str)
# Send email with the chart image attached
"""send_email_with_attachment(
recipient_email=user_email,
subject="Warehouse Inventory Report",
body=response.get("output", "").split(":")[0],
# attachment_path=chart_path
attachment_path=os.getenv("IMAGE_PATH")
)"""
if "send email to" in user_question:
try:
os.remove(img) # Clean up the temporary image file
except Exception as e:
print(f"Error cleaning up image file: {e}")
except Exception as e:
print(f"Error loading image file: {e}")
chatbot.append((user_question, "Chart generation failed. Please try again."))
return gr.update(value=chatbot)
else:
chatbot.append((user_question, response_text))
return gr.update(value=chatbot)
def submit_feedback(feedback, chatbot, request: gr.Request):
gr.Info("Thank you for your feedback.")
#save feedback with user question and response in database
save_feedback(request.username,chatbot[-1][0], chatbot[-1][1], feedback)
feedback_response = "User feedback: " + feedback
return chatbot + [(feedback_response, None)], gr.update(visible=False), gr.update(visible=False)
# Function to connect to MySQL database
def connect_to_db():
return mysql.connector.connect(
host=DB_HOST,
user=DB_USER,
password=DB_PASSWORD,
database=DB_NAME
)
# Function to save feedback to the database
def save_feedback(username, user_question, user_response, feedback):
try:
conn = connect_to_db()
cursor = conn.cursor()
query = "INSERT INTO user_feedback (username, question, response, feedback) VALUES (%s, %s, %s, %s)"
cursor.execute(query, (username, user_question, user_response, feedback))
conn.commit()
except mysql.connector.Error as err:
print(f"Error: {err}")
finally:
if cursor:
cursor.close()
if conn:
conn.close()
def handle_dislike(data: gr.LikeData):
if not data.liked:
print("downvote")
gr.Info("Please enter your feedback.")
return gr.update(visible=True), gr.update(visible=True)
else:
print("upvote")
return gr.update(visible=False), gr.update(visible=False)
# greet with user name on successful login
def update_message(request: gr.Request):
return f"<h2 style=' font-family: Calibri;'>Welcome, {request.username}</h4>"
# Function to generate a 50-word summary of the newly uploaded doc using OpenAI
def generate_summary(text):
prompt = (
"You are an AI that helps with document analysis. Please provide a concise title and a summary of the following document. "
"The summary should be about 50 words and include key details that can help answer questions accurately:\n\n"
f"{text}\n\nTitle : Summary"
)
# Call the OpenAI API to generate a summary
response = openai.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt,
}
],
model="gpt-4o-mini",
)
# Extract the title and summary from the response
response_content = response.choices[0].message.content
lines = response_content.split("\n")
# Extract title
title_line = lines[0]
title = title_line.split("**Title:**")[-1].strip()
# Extract summary
summary_line = lines[2]
summary = summary_line.split("**Summary:**")[-1].strip()
return title, summary
#function to handle file upload decide whether excel or doc is uploaded and respective tool will be created with appropriate prompts at runtime
def upload_file(filepath):
global vector_store1, file_extension
# Get the file extension
_, file_extension = os.path.splitext(filepath)
if file_extension == ".pdf":
texts1 = load_and_split_pdf(filepath)
vector_store1 = create_vector_store(texts1)
# Generate a 50-word summary from the extracted text
title, summary = generate_summary(texts1)
return title, summary, file_extension
elif file_extension == ".xlsx":
title, prompt = process_excel(filepath)
return title, prompt
def generate_example_questions(sheet_name, column_headers):
"""
Generates natural language questions based on column headers.
Args:
sheet_name (str): The name of the Excel sheet.
column_headers (list): List of column headers from the sheet.
Returns:
questions (list): List of generated questions based on the columns.
"""
questions = []
# Check for typical columns and create questions
if 'Product Name' in column_headers or 'Product' in column_headers:
questions.append(f"What is the total sales for a specific product in {sheet_name}?")
if 'Sales Amount' in column_headers or 'Amount' in column_headers:
questions.append(f"What is the total sales amount for a specific region in {sheet_name}?")
if 'Region' in column_headers:
questions.append(f"Which region had the highest sales in {sheet_name}?")
if 'Date' in column_headers:
questions.append(f"What were the total sales during a specific month in {sheet_name}?")
if 'Price' in column_headers:
questions.append(f"What is the price of a specific product in {sheet_name}?")
if any(fnmatch.fnmatch(header, 'Employee*') for header in column_headers):
questions.append(f"What are the details of the distinct broker names?")
return questions
def generate_prompt_from_excel_file(df_dict):
"""
Generates a prompt from an Excel file containing multiple sheets.
Args:
excel_file_path (str): The path to the Excel file.
Returns:
prompt (str): A detailed prompt including sheet names, column headers, sample data,
and example questions for each sheet.
"""
# Initialize prompt with basic structure
prompt = "You have been provided with an Excel file containing data in several sheets.\n"
# Loop through each sheet to extract column headers and sample data
for sheet_name, sheet_df in df_dict.items():
# Extract column headers
column_headers = list(sheet_df.columns)
# Get a sample of the data (first few rows)
sample_data = sheet_df.head(3).to_string(index=False)
# Add sheet details to the prompt
prompt += f"For the sheet '{sheet_name}', the column headers are:"
prompt += f"{', '.join(column_headers)}\n\n"
#prompt += f"Example data from sheet '{sheet_name}':\n"
#prompt += f"{sample_data}\n\n"
# Generate example natural language questions based on columns
example_questions = generate_example_questions(sheet_name, column_headers)
#prompt += "### Example Questions:\n"
#for question in example_questions:
# prompt += f"- {question}\n"
#prompt += "\n"
# Finalize the prompt with function call description
prompt += f"- Query: A natural language question (e.g., List all the employees with broker name ADP or Alerus). The question should be sent as 'What are the employee details with broker name ADP or Alerus :'."
prompt += f"""Output : {docstatus}. Here is the sample table:
{sample_table}.
"""
prompt += f"- Query: A natural language question with request to create LOA document (e.g., can you create LOA document for all the employees with broker name ADP or Alerus). The question should be sent as 'What are the employee details with broker name ADP or Alerus : LOA document'."
prompt += f"""Output: {docstatus}. Here is the sample table:
{sample_table}.
If there is any error, please display the message returned by the function as response. """
return "Excel data", prompt
# Function to handle "Add to RedMindGPT" button click
def add_to_redmindgpt(title, summary):
"""
Adds a document or Excel file to the RedmindGPT system and configures the appropriate runtime tool for handling related queries.
Parameters:
title (str): The title of the document or Excel file.
summary (str): A brief summary of the document or Excel file.
Returns:
str: A message indicating whether the file has been added successfully.
Behavior:
- If the file extension is ".pdf", it sets up a runtime tool for handling document-related queries.
- If the file extension is ".xlsx", it sets up a runtime tool for handling Excel data-related queries.
- Configures the prompt template for the agent executor based on the file type.
- Adds the configured runtime tool to the list of tools used by the agent executor.
"""
global agent_executor, file_extension
if file_extension == ".pdf":
run_time_tool_summary = f"For {title} document related questions, Please refer runtimeDocumentData tool. {summary}. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points."
run_time_tool = StructuredTool(
func=document_data_tool_runtime,
name="runtimeDocumentData",
args_schema=QueryInput,
output_schema=QueryOutput,
description=f"You are an AI assistant trained to help with the questions based on the uploaded document {title}. {summary}. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points."
)
# Add the new tool to the beginning
tools.insert(0, run_time_tool)
prompt_template = f"""You are an assistant that helps with database queries, API information, and document retrieval. Your job is to provide clear, complete, and detailed responses to the following queries. Please give the output response in an user friendly way and remove "**" from the response. For example, document related queries can be answered in a clear and concise way with numbering and not as a paragraph. Database related queries should be answered with proper indentation and use numbering for the rows. ASN id related queries should be answered with proper indentation and use numbering for the rows.
{run_time_tool_summary}
For ASN id related questions, if the user specifies an ASN id, provide the information from the api tool. Pass only the id as input to the tool. Do not pass the entire question as input to the tool. If the details are not found, say it in a clear and concise way.
You are an AI assistant trained to help with warehouse management questions based on a detailed document about our WMS. The document covers various processes such as ASN handling, purchase orders, cross docking, appointment scheduling for shipments, and yard management. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points. When answering, focus on providing actionable insights and clear explanations related to the specific query. Please remove "**" from the response.
For SQL database-related questions, only use the fields available in the warehouse schema, including tables such as customer_master, efs_company_master, efs_group_company_master, efs_region_master, party_address_detail, wms_warehouse_master.
For datavisualization, user will ask for inventory report of a particular warehouse. Your job is to return the image path to chat interface and display the image as output.
{{agent_scratchpad}}
Here is the information you need to process:
Question: {{input}}"""
agent_executor = bind_llm(llm,tools,prompt_template)
return f"File has been added successfully."
elif file_extension == ".xlsx":
run_time_excel_tool_summary = f"For {title} related questions, Please refer runtimeExcelData tool. {summary}. Display the response only in the format as mentioned in the tool description. "
run_time_excel_tool = StructuredTool(
func=chat_with_excel_data_dataframe,
name="runtimeExcelData",
args_schema=QueryInput,
output_schema=QueryOutput,
description=f"""You are an AI assistant trained to handle Excel data and return meaningful insights. If user query is given with an option of generating the document with the result set dataframe, pass two inputs to the tool. First input is the user query and the second input will be the phrase "create document". display the response only in the below format.
{docstatus}. Here is the sample data:
{sample_table}.
Please provide the total rows count from the {total_rows} values returned by the function and not the count of sample table rows. If there is any error, please display the message returned by the function as response. """
)
# Add the new tool to the beginning
tools.insert(0, run_time_excel_tool)
prompt_template = f"""You are an assistant that helps with database queries, API information, and document retrieval. Your job is to provide clear, complete, and detailed responses to the following queries. Please give the output response in an user friendly way and remove "**" from the response. For example, document related queries can be answered in a clear and concise way with numbering and not as a paragraph. Database related queries should be answered with proper indentation and use numbering for the rows. ASN id related queries should be answered with proper indentation and use numbering for the rows.
{run_time_excel_tool_summary}
For ASN id related questions, if the user specifies an ASN id, provide the information from the api tool. Pass only the id as input to the tool. Do not pass the entire question as input to the tool. If the details are not found, say it in a clear and concise way.
You are an AI assistant trained to help with warehouse management questions based on a detailed document about our WMS. The document covers various processes such as ASN handling, purchase orders, cross docking, appointment scheduling for shipments, and yard management. Please provide a complete and concise response within 200 words and Ensure that the response is not truncated and covers the essential points. When answering, focus on providing actionable insights and clear explanations related to the specific query. Please remove "**" from the response.
For SQL database-related questions, only use the fields available in the warehouse schema, including tables such as customer_master, efs_company_master, efs_group_company_master, efs_region_master, party_address_detail, wms_warehouse_master.
For datavisualization, user will ask for inventory report of a particular warehouse. Your job is to return the image path to chat interface and display the image as output.
{{agent_scratchpad}}
Here is the information you need to process:
Question: {{input}}"""
agent_executor = bind_llm(llm,tools,prompt_template)
return f"File has been added successfully."
def process_excel(file):
global excel_dataframe
# Check if the file is None
if file is None:
return "Excel file", "Your excel does not have values. Please upload a different file." # Return an empty dataframe if no file is uploaded
else:
# Read the uploaded Excel file
excel_dataframe = pd.read_excel(file.name, sheet_name=None) # 'file.name' to get the actual file path
#to get title and summary of excel file
title, prompt = generate_prompt_from_excel_file(excel_dataframe)
excel_dataframe = pd.read_excel(file.name)
return title, prompt # Return the success message.
def chat_with_excel_data(question):
global excel_dataframe
response_dataframe = chat_with_llm(excel_dataframe,question)
print(response_dataframe)
return response_dataframe
def chat_with_excel_data_dataframe(question):
isDataFrame = True
print(f"question for excel data frame : {question}")
if "LOA" in question:
#question = question.replace("create document", "").strip()
create_document = True
else:
create_document = False
print(f"create document : {create_document}")
response_dataframe = chat_with_excel_data(question)
if isinstance(response_dataframe, pd.DataFrame) == False:
print("The result is not a DataFrame.")
if ":" in response_dataframe:
isDataFrame = False
names_part = response_dataframe.split(":", 1)[1] # Get everything after the colon and space
# Split the names by commas to create a list
names = names_part.split(",")
# Convert the list of names to a DataFrame
response_dataframe = pd.DataFrame(names, columns=["Result"])
#handle large dataset
response = handle_large_dataset(response_dataframe, create_document,isDataFrame)
return response
#Save the respnse dataframe to an Excel file in hostinger so that the user can download it
#save_file_path = "dataframe_output.xlsx"
#response_dataframe.to_excel(save_file_path, index=False)
#save_file_to_hostinger(save_file_path)
# Check if the response is a DataFrame
"""if isinstance(response_dataframe, pd.DataFrame):
# Convert DataFrame to HTML for display
df_html = response_dataframe.to_html(classes='dataframe', index=False)
print(f"dfhtml:{df_html}")
return df_html"""
#return response_dataframe.head(10)#, len(response_dataframe)
def save_file_to_hostinger(save_file_path):
from ftplib import FTP
# Step 2: FTP server credentials
ftp_host = 'ftp.redmindtechnologies.com' # Replace with your FTP server address
ftp_user = 'u852023448.redmindGpt' # Replace with your FTP username
ftp_pass = 'RedMind@505' # Replace with your FTP password
remote_file_path = '/RedMindGPT/output.xlsx' # Replace with the desired path on the server
# Create an FTP connection
ftp = FTP(ftp_host)
ftp.login(ftp_user, ftp_pass)
# Open the local file and upload it to the server
with open(save_file_path, 'rb') as file:
ftp.storbinary(f'STOR {remote_file_path}', file)
print(f'File {save_file_path} uploaded to {remote_file_path} on server.')
# Close the FTP connection
ftp.quit()
def handle_large_dataset(df, create_document,isDataFrame):
total_rows = len(df)
#print(df)
print(f"Total rows: {total_rows}")
docstatus = f"Download the complete dataset <a href='https://redmindtechnologies.com/RedMindGPT/output.xlsx' download> here.</a>.There are total of {total_rows} rows."
if total_rows < 4000:
# 1. Limit to first 10 rows
# 2. Handle missing values
#limited_data.fillna("N/A", inplace=True)
# 3. Drop the original first column
if len(df.columns) > 1:
# Skipping the original first column
limited_data = df.head(3)
limited_data_without_first_column = limited_data.iloc[:, 1:]
else:
limited_data = df.head(20)
limited_data_without_first_column = limited_data
#print( "range "+ len(limited_data_without_first_column))
# 4. Add SNo (serial number) as the first column, starting from 1
if isDataFrame :
limited_data_without_first_column.insert(0, 'SNo', range(1, len(limited_data_without_first_column) + 1))
else:
limited_data_without_first_column.insert(0, 'SNo', range(1, len(limited_data) + 1))
# 3. Save the full dataset to a downloadable file
import os
# Get the current working directory
current_folder = os.getcwd()
file_path = os.path.join(current_folder, 'output_data.xlsx')
df.to_excel(file_path, index=False)
files = os.listdir(current_folder)
print(f"Files in persistent storage: {files}")
print(f"The current folder is: {current_folder}")
"""from huggingface_hub import Repository
repo = Repository(
local_dir="./",
repo_type="space",
repo_id="Redmind/NewageNXTGPT",
use_auth_token=os.getenv("HF_TOKEN"),
)"""
file_path = "output_data.xlsx"
#download_url = repo.get_download_url(file_path)
from huggingface_hub import upload_file
# Upload file to the Hugging Face Hub
repo_id = "Redmind/NewageNXTGPT"
#file_path = "/app/example.txt" # Path to the file to upload
upload_file(path_or_fileobj=file_path, repo_id=repo_id, path_in_repo="output/output.xlsx")
from huggingface_hub import hf_hub_url
print(hf_hub_url(
repo_id="Redmind/NewageNXTGPT", filename="output/output.xlsx"
))
print(f"Download the file here: {download_url}")
#save_file_to_hostinger('output_data.xlsx')
# 4. Create a summary and table of the first 10 rows for display
#columns = list(df.columns)
sample_table = limited_data_without_first_column.to_markdown()
#print(sample_table)
if create_document:
#Logic to generate pdfs with employee name and account number
for index, row in df.iterrows():
# Create a PDF for each row
create_pdf(row['Account Name'], row['Account ID'])
create_document = False
docstatus += f" {total_rows} documents are created successfully."
print(sample_table)
# 5. Return the summary and downloadable link
#return f"""
#There are a total of {total_rows} rows. Please download the complete dataset here: <a href="https://redmindtechnologies.com/RedMindGPT/output.xlsx" download>Download</a>. Here are the first 3 rows:
#{sample_table} """
return sample_table, docstatus
else:
return "Your query returns a large dataset which is not supported in the current version. Please try a different query."
def create_pdf(name,id):
filled = FormWrapper("Goldman_LOA - Gold.pdf").fill(
{
"Title of Account": name,
"Account Number": id,
"Print Name and Title": name
},
)
output_file_name = f"documents\\{name}.pdf"
with open(output_file_name, "wb+") as output:
output.write(filled.read())
return f"{output_file_name} is created successfully."
css = """
/* Example of custom button styling */
.gr-button {
background-color: #6366f1; /* Change to your desired button color */
color: white;
border-radius: 8px; /* Make the corners rounded */
border: none;
padding: 10px 20px;
font-size: 12px;
cursor: pointer;
}
.gr-button:hover {
background-color: #8a92f7; /* Darker shade on hover */
}
.gr-buttonbig {
background-color: #6366f1; /* Change to your desired button color */
color: white;
border-radius: 8px; /* Make the corners rounded */
border: none;
padding: 10px 20px;
font-size: 14px;
cursor: pointer;
}
.gr-buttonbig:hover {
background-color: #8a92f7; /* Darker shade on hover */
}
/* Customizing the Logout link to be on the right */
.logout-link {
text-align: right;
display: inline-block;
width: 100%;
}
.logout-link a {
color: #4A90E2; /* Link color */
text-decoration: none;
font-size: 16px;
}
.chatbot_gpt {
height: 600px !important; /* Adjust height as needed */
}
.logout-link a:hover {
text-decoration: underline; /* Underline on hover */
}
.message-buttons-right{
display: none !important;
}
body, .gradio-container {
margin: 0;
padding: 0;
}
/* Styling the tab header with a blue background */
.gr-tab-header {
background-color: #4A90E2; /* Blue background for the tab header */
padding: 10px;
border-radius: 8px;
color: white;
font-size: 16px;
}
/* Styling the selected tab text color to be green */
.gr-tab-header .gr-tab-active {
color: green; /* Change selected tab text to green */
}
/* Keep non-selected tab text color white */
.gr-tab-header .gr-tab {
color: white;
}
/* Custom CSS for reducing the size of the video element */
.video-player {
width: 500px; /* Set a custom width for the video */
height: 350px; /* Set a custom height for the video */
margin: 0 auto; /* Center the video horizontally */
}
"""
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
gr.HTML("<CENTER><B><h1 style='font-size:30px; font-family: Calibri;'>RedMindGPT</h1></B></CENTER>")
# Logout link styled as text link in the right corner
gr.Markdown("<div class='logout-link'><a href='/logout'><b>Logout</b></a></div>")
# Unified RedMindGPT Interface
with gr.Row():
m = gr.Markdown()
demo.load(update_message, None, m)
# Buttons for sample queries
with gr.Row():
sample_button = gr.Button("What are the details of ASN24091600002", elem_classes="gr-buttonbig")
sample_button1 = gr.Button("What are the active warehouses available", elem_classes="gr-buttonbig")
sample_button2 = gr.Button("Explain Pre-Receiving Yard Management", elem_classes="gr-buttonbig")
sample_button3 = gr.Button("Can you generate a pie chart with item names and quantities in warehouse WH1000001", elem_classes="gr-buttonbig")
sample_button4 = gr.Button("Analyze item name & quantity for different customers in a stacked bar chart for the warehouse WH1000001 & send email to [email protected]", elem_classes="gr-button")
# Chatbot component
with gr.Row():
chatbot = gr.Chatbot(label="Select any of the questions listed above to experience RedMindGPT in action.", elem_classes="chatbot_gpt")
# Textbox for user questions
with gr.Row():
with gr.Column(scale=1):
message = gr.Textbox(show_label=False, container=False, placeholder="Please enter your question")
with gr.Row():
feedback_textbox = gr.Textbox(visible=False, show_label=False, container=False, placeholder="Please enter your feedback.")
submit_feedback_button = gr.Button("Submit Feedback", visible=False, elem_classes="gr-buttonbig")
with gr.Column(scale=1):
with gr.Row():
button = gr.Button("Submit", elem_id="submit", elem_classes="gr-buttonbig")
stop_button = gr.Button("Stop", elem_classes="gr-buttonbig")
# Rearranged to place Upload Doc and Upload Excel in the same row
with gr.Row():
with gr.Column(scale=1):
# File Upload Section
gr.Markdown("**Add a document or Excel for natural language interaction.**")
with gr.Column(scale=1):
u = gr.UploadButton("Upload a doc/excel", file_count="single", elem_classes="gr-buttonbig")
#excel_file = gr.UploadButton("Upload an excel", file_count="single", elem_classes="gr-buttonbig", file_types=[".xlsx", ".xls"])
with gr.Column(scale=1):
add_button = gr.Button("Add to RedMindGPT", elem_classes="gr-buttonbig", visible=False)
with gr.Row():
title_textbox = gr.Textbox(label="Title", visible=False)
summary_textarea = gr.Textbox(label="Summary", lines=5, visible=False)
output_message = gr.Markdown() # Markdown to display output message
success_message = gr.Markdown() # Placeholder for messages
# Moved function calling lines to the end
stop_button.click(stop_processing, [chatbot], [chatbot])
button.click(handle_query, [message, chatbot], [chatbot])
message.submit(handle_query, [message, chatbot], [chatbot])
message.submit(lambda x: gr.update(value=""), None, [message], queue=False)
button.click(lambda x: gr.update(value=''), [], [message])
chatbot.like(handle_dislike, None, outputs=[feedback_textbox, submit_feedback_button])
submit_feedback_button.click(submit_feedback, [feedback_textbox, chatbot], [chatbot, feedback_textbox, submit_feedback_button])
submit_feedback_button.click(lambda x: gr.update(value=''), [], [feedback_textbox])
sample_button.click(handle_query, [sample_button, chatbot], [chatbot])
sample_button1.click(handle_query, [sample_button1, chatbot], [chatbot])
sample_button2.click(handle_query, [sample_button2, chatbot], [chatbot])
sample_button3.click(handle_query, [sample_button3, chatbot], [chatbot])
sample_button4.click(handle_query, [sample_button4, chatbot], [chatbot])
u.upload(upload_file, u, [title_textbox, summary_textarea])
u.upload(lambda _: (gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)), None, [title_textbox, summary_textarea, add_button])
add_button.click(add_to_redmindgpt, [title_textbox, summary_textarea], output_message)
add_button.click(lambda _: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)), None, [title_textbox, summary_textarea, add_button])
demo.launch(auth=[("lakshmi", "redmind"), ("arun", "redmind"), ("NewageGlobal", "Newage123$")], auth_message="RedMindGPT", inline=False)