1 / app.py
Rsnarsna's picture
Update app.py
3d52a96 verified
import gradio as gr
import threading
from email.header import decode_header
import mysql.connector
from transformers import pipeline # Assuming you'll use Hugging Face pipeline
import email, imaplib, json, time
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Email and database configuration
IMAP_SERVER = 'imap.gmail.com'
EMAIL_ADDRESS = '[email protected]'
PASSWORD = 'gclc wsnx kywt uvqy' # Store this securely in production
DB_CONFIG = {
'host': '0.tcp.in.ngrok.io',
'port': 11329,
'user': 'root',
'password': '', # Add the correct password
'database': 'shipment_details'
}
# JSON format for extracted shipment details
output_format = {
"origin": "",
"destination": "",
"expected_shipment_datetime": "",
"types_of_service": "",
"warehouse": "",
"description": "",
"quantities": "",
"carrier_details": ""
}
# Prompt for LLM to process shipment-related emails
prompt = """
System prompt: You will be provided with an email containing shipment details. Your task is to extract specific information based on the given instructions.
Instructions:
1. Focus only on extracting details about future shipments, ignore irrelevant information.
2. Output should be in JSON format. Missing information should be marked as null.
3. Extract the following:
- origin
- destination
- expected_shipment_datetime (format: yyyy-mm-dd hh:mm:ss)
- types_of_service (AIR, LCL, FCL)
- warehouse
- description (max 100 words)
- quantities
- carrier_details
4. The output should be formatted as follows:
{
"origin": "",
"destination": "",
"expected_shipment_datetime": "",
"types_of_service": "",
"warehouse": "",
"description": "",
"quantities": "",
"carrier_details": ""
}
"""
# Function to insert extracted shipment details into MySQL database
def insert_data(extracted_details):
try:
# Initialize MySQL database connection
mydb = mysql.connector.connect(**DB_CONFIG)
cursor = mydb.cursor()
# Skip insertion if all required fields are empty
required_fields = [
'origin', 'destination', 'expected_shipment_datetime',
'types_of_service', 'warehouse', 'description',
'quantities', 'carrier_details'
]
if all(extracted_details.get(field) in ["", None] for field in required_fields):
logger.info("Skipping insertion: All extracted values are empty.")
return
# Insert data into database
sql = """
INSERT INTO shipment_details (
origin, destination, expected_shipment_datetime, types_of_service,
warehouse, description, quantities, carrier_details,
sender, receiver, cc, bcc, subject
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
values = (
extracted_details.get('origin'),
extracted_details.get('destination'),
extracted_details.get('expected_shipment_datetime'),
extracted_details.get('types_of_service'),
extracted_details.get('warehouse'),
extracted_details.get('description'),
extracted_details.get('quantities'),
extracted_details.get('carrier_details'),
extracted_details.get('sender'),
extracted_details.get('receiver'),
extracted_details.get('cc'),
extracted_details.get('bcc'),
extracted_details.get('subject')
)
cursor.execute(sql, values)
mydb.commit()
logger.info("Data inserted successfully.")
except mysql.connector.Error as db_err:
logger.error(f"Database error: {db_err}")
except Exception as ex:
logger.error(f"Error inserting data: {ex}")
# Function to extract shipment details using an LLM
def get_details(mail):
try:
# Initialize LLM model and tokenizer
# Uncomment below if using Hugging Face models, or load your specific model accordingly
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
# output = pipe(f"{prompt}\n{mail}", max_new_tokens=200)
# Assuming Llama model for completion (example)
response = { # Placeholder response for testing purposes
"origin": "New York",
"destination": "Los Angeles",
"expected_shipment_datetime": "2024-10-20 12:00:00",
"types_of_service": "AIR",
"warehouse": "Warehouse 1",
"description": "Electronics shipment",
"quantities": "10",
"carrier_details": "Carrier XYZ"
}
return json.dumps(response) # Returning mock response for testing
except Exception as ex:
logger.error(f"Error generating details from LLM: {ex}")
return None
# Function to read and process unread emails
def read_email():
logging.info('Ready to read email...')
try:
logging.info('Connecting to IMAP server...')
mail = imaplib.IMAP4_SSL(IMAP_SERVER)
logger.info('mail ')
mail.login(EMAIL_ADDRESS, PASSWORD)
logger.info('logrd in ! ...')
mail.select('inbox')
logging.info('Selected inbox')
status, messages = mail.search(None, 'UNSEEN')
message_ids = messages[0].split()
logging.info(f"Total unread emails: {len(message_ids)}")
for message_id in message_ids:
try:
status, data = mail.fetch(message_id, '(RFC822)')
raw_email = data[0][1]
email_message = email.message_from_bytes(raw_email)
# Extract metadata
sender = email_message['From']
receiver = email_message['To']
cc = email_message.get('Cc', '')
bcc = email_message.get('Bcc', '')
subject = email_message['Subject']
# Extract email body
if email_message.is_multipart():
for part in email_message.walk():
if part.get_content_type() == 'text/plain':
email_body = part.get_payload(decode=True).decode('utf-8')
break
else:
email_body = email_message.get_payload(decode=True).decode('utf-8')
# Extract and store details
extracted_details_str = get_details(email_body)
extracted_details = json.loads(extracted_details_str)
meta_data = {
'sender': sender, 'receiver': receiver, 'cc': cc, 'bcc': bcc, 'subject': subject
}
extracted_details.update(meta_data)
insert_data(extracted_details)
except Exception as e:
logger.error(f"Error processing email {message_id}: {e}")
mail.close()
mail.logout()
except Exception as e:
logger.error(f"Error reading emails: {e}")
# Email processing loop variables
running = False
loop_thread = None
# Dummy function to simulate email processing (replace with actual email processing code)
def email_processing_loop():
global running
logger.info("Starting email processing loop...")
while running:
logger.info("Processing emails...")
read_email()
logger.info('waiting !...')
time.sleep(10) # Simulating a process that checks every 10 seconds
# Start the email processing loop
def start_processing():
global running, loop_thread
if not running:
running = True
loop_thread = threading.Thread(target=email_processing_loop, daemon=True)
loop_thread.start()
return update_status()
# Stop the email processing loop
def stop_processing():
global running
if running:
running = False
return update_status()
# Update the status of email processing
def update_status():
return "Running" if running else "Stopped"
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Email Processing")
status_display = gr.Textbox(label="Email Processing Status", value=update_status(), interactive=False)
start_button = gr.Button("Start Processing")
stop_button = gr.Button("Stop Processing")
# Link buttons to processing functions
start_button.click(fn=start_processing, outputs=status_display)
stop_button.click(fn=stop_processing, outputs=status_display)
# Add a manual timer without the interval parameter
# gr.Timer.every(2, fn=update_status, outputs=status_display) # Run `update_status()` every 2 seconds
if __name__ == "__main__":
logging.info('Starting project...')
demo.launch()