Spaces:
Sleeping
Sleeping
import os | |
import json | |
from flask import Flask, jsonify, request, send_file, send_from_directory | |
from langchain_core.messages import HumanMessage | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
import assemblyai as aai | |
from dotenv import load_dotenv | |
# Load environment variables from the .env file | |
load_dotenv() | |
# Initialize the Flask app | |
app = Flask(__name__) | |
# Get API keys from environment variables | |
ASSEMBLYAI_API_KEY = os.getenv("ASSEMBLYAI_API_KEY").strip() | |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY").strip() | |
# Set AssemblyAI API key | |
aai.settings.api_key = ASSEMBLYAI_API_KEY | |
# Set Google API key for Gemini model | |
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY | |
# Define a directory to save uploaded audio files | |
UPLOAD_FOLDER = 'uploads' | |
os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER | |
# Refined Instructions for Gemini | |
GEMINI_INSTRUCTIONS = """ | |
The purpose of this classification is to determine whether a caller has agreed to visit the dealership at a specific time or within a certain time range. | |
### Option 1: (Yes, at a specific time or range of time within 1 hour): | |
A call falls into this category if the caller explicitly agrees to visit the dealership at a specific time or within a one-hour range. | |
#### Criteria: | |
- Caller explicitly mentions a specific time (e.g., "I'll be there at 4:00"). | |
- Caller gives an estimated arrival time within a one-hour range (e.g., "I'll be there in 30 minutes"). | |
- Caller provides a loose time range, but the agent confirms a firm time (e.g., Caller: "Between 6:00 and 8:00," Agent: "I'll put you down for 7:00"). | |
- Caller is already on the dealership lot (walk-in). | |
#### Examples (Accepted for this Option): | |
- "I'm going to come in and take a look at 4:00." | |
- "I'll stop by around 3:00 on Saturday." | |
- "I'll try to go test drive it at noon." | |
- "I'm on my way right now, and I'll be there in just a few minutes." | |
- "I'll come down in 30 minutes if it's still on the lot." | |
- "I'll be there in about 45-60 minutes." | |
#### Not Considered Specific (Excluded from this Option): | |
- "I'll come after 4:00" (since the arrival time is unknown, it could be anytime after 4 PM). | |
- "I'll be at the dealership till 4 PM" (caller could arrive anytime before 4 PM). | |
- "I'll be there between 7:00 and 9:00" (exceeds a one-hour range). | |
--- | |
### Option 2: (Yes, at a loose time or range of time exceeding 1 hour): | |
A call falls into this Option if the caller expresses an intention to visit but does not provide a specific or one-hour time range. | |
#### Criteria: | |
- Caller agrees to visit but gives a broad time frame (longer than one hour). | |
- Caller uses vague terms such as "sometime today" or "this evening." | |
- Caller mentions visiting based on a conditional factor (e.g., "If I get off work early, I'll stop in"). | |
#### Examples (Accepted for this category): | |
- "I'll stop by sometime today." | |
- "My wife will go there Saturday." | |
- "I might be there to test drive it this evening." | |
- "I get off work at 5:00, and I'll come by after that." | |
- "I'll be there tonight between 7:00 and 9:00." | |
- "Maybe I'll come take a look at it later." | |
- "If I get off work early, I'll stop in." | |
--- | |
### Option 3: | |
if call didnot fall in option 1 or option2 then select option 3 | |
### Classification Logic Summary: | |
1. If the caller specifies an exact time or an estimated arrival time within one hour β **optiony 1 (Specific time or within 1 hour).** | |
2. If the caller provides a time range longer than one hour or speaks vaguely about their visit β **Option 2 (Loose time or exceeding 1 hour).** | |
Based on this classification, return the appropriate option from: | |
- **Option 1**: Specific time or within 1 hour. | |
- **Option 2**: Loose time or exceeding 1 hour. | |
""" | |
# Home route to serve the index.html file from the root directory | |
def home(): | |
return send_file('index.html') | |
# API route to handle file upload, transcription, and model interaction | |
def generate_api(): | |
if request.method == "POST": | |
try: | |
# Check if an audio file was uploaded | |
if 'audio_file' not in request.files: | |
return jsonify({"error": "No audio file provided"}), 400 | |
audio_file = request.files['audio_file'] | |
if audio_file.filename == '': | |
return jsonify({"error": "No selected file"}), 400 | |
# Save the uploaded file to the server | |
file_path = os.path.join(app.config['UPLOAD_FOLDER'], audio_file.filename) | |
audio_file.save(file_path) | |
# Transcribe the audio using AssemblyAI | |
transcriber = aai.Transcriber() | |
transcript = transcriber.transcribe(file_path) | |
# Send transcription and instructions to Gemini model | |
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-002") | |
message = HumanMessage(content=f"{GEMINI_INSTRUCTIONS}\n\nCall Transcription: {transcript.text}") | |
response = model.stream([message]) | |
# Interpret the model's response to select the correct option | |
buffer = [] | |
for chunk in response: | |
buffer.append(chunk.content) | |
result_text = ''.join(buffer).lower() | |
options = {'option 1': 1,'option 2': 2,'option 3':3, 'option 4':4}; | |
for option in options: | |
if option in result_text: | |
selected_option = options[option] | |
break | |
# Return the transcription and selected option | |
return jsonify({ | |
"transcription": transcript.text, | |
"selected_option": selected_option | |
}), 200 | |
except Exception as e: | |
return jsonify({"error": str(e)}) | |
# Route to serve static files | |
def serve_static(path): | |
return send_from_directory('.', path) | |
# Run the Flask application | |
if __name__ == '__main__': | |
app.run(debug=True) | |