Chief-Joy-Officer / interfaces /whatsapp /whatsapp_response.py
ric9176's picture
Add interface and update pyproject.toml for new deeps
5cb5f85
import logging
import os
from io import BytesIO
from typing import Dict
import httpx
from fastapi import APIRouter, Request, Response
from langchain_core.messages import HumanMessage
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
from agent import create_agent_graph
from agent.utils.state import AgentState
logger = logging.getLogger(__name__)
# Global module instances
speech_to_text = SpeechToText()
text_to_speech = TextToSpeech()
image_to_text = ImageToText()
# Router for WhatsApp respo
whatsapp_router = APIRouter()
# WhatsApp API credentials
WHATSAPP_TOKEN = os.getenv("WHATSAPP_TOKEN")
WHATSAPP_PHONE_NUMBER_ID = os.getenv("WHATSAPP_PHONE_NUMBER_ID")
@whatsapp_router.api_route("/whatsapp_response", methods=["GET", "POST"])
async def whatsapp_handler(request: Request) -> Response:
"""Handles incoming messages and status updates from the WhatsApp Cloud API."""
if request.method == "GET":
params = request.query_params
if params.get("hub.verify_token") == os.getenv("WHATSAPP_VERIFY_TOKEN"):
return Response(content=params.get("hub.challenge"), status_code=200)
return Response(content="Verification token mismatch", status_code=403)
try:
data = await request.json()
change_value = data["entry"][0]["changes"][0]["value"]
if "messages" in change_value:
message = change_value["messages"][0]
from_number = message["from"]
session_id = from_number
# Get user message (text only for now)
if message["type"] != "text":
await send_response(from_number, "Sorry, I can only process text messages at the moment.")
return Response(content="Non-text message received", status_code=200)
content = message["text"]["body"]
# Process message through the graph agent
async with AsyncSqliteSaver.from_conn_string("data/short_term.db") as short_term_memory:
graph = await create_agent_graph(short_term_memory)
current_state = AgentState(
messages=[HumanMessage(content=content)],
context=[]
)
# Get the response from the graph
output_state = await graph.ainvoke(
current_state,
{"configurable": {"thread_id": session_id}},
)
response_message = output_state["messages"][-1].content
success = await send_response(from_number, response_message)
if not success:
return Response(content="Failed to send message", status_code=500)
return Response(content="Message processed", status_code=200)
elif "statuses" in change_value:
return Response(content="Status update received", status_code=200)
else:
return Response(content="Unknown event type", status_code=400)
except Exception as e:
logger.error(f"Error processing message: {e}", exc_info=True)
return Response(content="Internal server error", status_code=500)
async def download_media(media_id: str) -> bytes:
"""Download media from WhatsApp."""
media_metadata_url = f"https://graph.facebook.com/v21.0/{media_id}"
headers = {"Authorization": f"Bearer {WHATSAPP_TOKEN}"}
async with httpx.AsyncClient() as client:
metadata_response = await client.get(media_metadata_url, headers=headers)
metadata_response.raise_for_status()
metadata = metadata_response.json()
download_url = metadata.get("url")
media_response = await client.get(download_url, headers=headers)
media_response.raise_for_status()
return media_response.content
async def process_audio_message(message: Dict) -> str:
"""Download and transcribe audio message."""
audio_id = message["audio"]["id"]
media_metadata_url = f"https://graph.facebook.com/v21.0/{audio_id}"
headers = {"Authorization": f"Bearer {WHATSAPP_TOKEN}"}
async with httpx.AsyncClient() as client:
metadata_response = await client.get(media_metadata_url, headers=headers)
metadata_response.raise_for_status()
metadata = metadata_response.json()
download_url = metadata.get("url")
# Download the audio file
async with httpx.AsyncClient() as client:
audio_response = await client.get(download_url, headers=headers)
audio_response.raise_for_status()
# Prepare for transcription
audio_buffer = BytesIO(audio_response.content)
audio_buffer.seek(0)
audio_data = audio_buffer.read()
return await speech_to_text.transcribe(audio_data)
async def send_response(from_number: str, response_text: str) -> bool:
"""Send text response to user via WhatsApp API."""
headers = {
"Authorization": f"Bearer {WHATSAPP_TOKEN}",
"Content-Type": "application/json",
}
json_data = {
"messaging_product": "whatsapp",
"to": from_number,
"type": "text",
"text": {"body": response_text},
}
async with httpx.AsyncClient() as client:
response = await client.post(
f"https://graph.facebook.com/v21.0/{WHATSAPP_PHONE_NUMBER_ID}/messages",
headers=headers,
json=json_data,
)
return response.status_code == 200
async def upload_media(media_content: BytesIO, mime_type: str) -> str:
"""Upload media to WhatsApp servers."""
headers = {"Authorization": f"Bearer {WHATSAPP_TOKEN}"}
files = {"file": ("response.mp3", media_content, mime_type)}
data = {"messaging_product": "whatsapp", "type": mime_type}
async with httpx.AsyncClient() as client:
response = await client.post(
f"https://graph.facebook.com/v21.0/{WHATSAPP_PHONE_NUMBER_ID}/media",
headers=headers,
files=files,
data=data,
)
result = response.json()
if "id" not in result:
raise Exception("Failed to upload media")
return result["id"]