Spaces:
Build error
Build error
import os | |
import httpx | |
from dotenv import load_dotenv | |
from typing import Dict, Any, Optional, List | |
from datetime import datetime | |
import logging | |
import asyncio | |
import hashlib | |
from openai import AsyncOpenAI | |
import json, requests, mimetypes | |
import google.generativeai as genai | |
import re, json | |
import PIL.Image | |
import requests | |
from typing import List, Dict, Any, Optional | |
from app.utils.load_env import ACCESS_TOKEN, WHATSAPP_API_URL, GEMINI_API, MEDIA_UPLOAD_URL | |
from app.utils.system_prompt import system_prompt | |
from app.services.search_engine import google_search | |
# from app.search.rag_pipeline import extract_keywords_async | |
from vidavox.core import ( | |
BaseResultFormatter, | |
SearchResult) | |
# Load environment variables | |
load_dotenv() | |
# Get base url from ngrok | |
def get_ngrok_url() -> str: | |
"""Fetches the public URL of the first ngrok tunnel.""" | |
try: | |
response = requests.get("http://localhost:4040/api/tunnels") | |
response.raise_for_status() # Raise an error for bad status codes. | |
tunnels = response.json().get("tunnels", []) | |
if tunnels: | |
# Prefer the HTTPS tunnel if available. | |
for tunnel in tunnels: | |
if tunnel.get("proto") == "https": | |
return tunnel.get("public_url") | |
# Fallback: return the first tunnel's URL. | |
return tunnels[0].get("public_url") | |
except Exception as e: | |
print("Error fetching ngrok URL:", e) | |
# Fallback in case ngrok isn't running. | |
return "http://localhost:8005" | |
base_url = get_ngrok_url() # Automatically retrieve your public ngrok URL | |
print("Base URL:", base_url) | |
# Get image link from image paths | |
def get_image_links(image_paths: List[str], base_url: str) -> List[str]: | |
links = [] | |
for path in image_paths: | |
# Remove the surrounding brackets and any extra whitespace | |
cleaned = path.strip("[]").strip() | |
# Split by comma to get individual image paths | |
parts = [part.strip() for part in cleaned.split(",") if part.strip()] | |
for part in parts: | |
# Assuming the part starts with "images/", extract the filename | |
if part.startswith("images/"): | |
filename = part.split("/", 1)[1] | |
links.append(f"{base_url}/images/{filename}") | |
else: | |
links.append(f"{base_url}/{part}") # Fallback if the format is unexpected | |
return links | |
# Define function specifications for Gemini | |
function_declarations = [ | |
{ | |
"name": "google_search", | |
"description": "Perform a Google search and retrieve search results", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"query": { | |
"type": "string", | |
"description": "The search query to perform" | |
}, | |
"num_results": { | |
"type": "string", | |
"description": "Number of search results to retrieve (1-10)", | |
"default": "3" | |
} | |
}, | |
"required": ["query"] | |
} | |
} | |
] | |
class CustomResultFormatter(BaseResultFormatter): | |
def format(self, result: SearchResult) -> Dict[str, Any]: | |
# Customize the result format as needed | |
return { | |
"doc_id": result.doc_id, | |
"page_content": result.text, | |
"image": result.meta_data['images_path'], | |
"relevance": result.score, | |
} | |
genai.configure(api_key=GEMINI_API) | |
# client = AsyncOpenAI(api_key = OPENAI_API) | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
# Validate environment variables | |
if not WHATSAPP_API_URL or not ACCESS_TOKEN: | |
logger.warning("Environment variables for WHATSAPP_API_URL or ACCESS_TOKEN are not set!") | |
# Path for the cache file | |
CACHE_FILE = 'upload_cache.json' | |
# Load the cache if it exists, otherwise initialize an empty dict | |
if os.path.exists(CACHE_FILE): | |
with open(CACHE_FILE, 'r') as f: | |
upload_cache = json.load(f) | |
else: | |
upload_cache = {} | |
def save_cache(): | |
with open(CACHE_FILE, 'w') as f: | |
json.dump(upload_cache, f) | |
def compute_file_hash(file_path, block_size=65536): | |
"""Compute SHA256 hash of a file to uniquely identify its content.""" | |
hasher = hashlib.sha256() | |
with open(file_path, 'rb') as f: | |
for block in iter(lambda: f.read(block_size), b''): | |
hasher.update(block) | |
return hasher.hexdigest() | |
# Helper function to upload an image | |
async def upload_image(file_path): | |
logger.info(f"Uploading image: {file_path}") | |
# Ensure the file exists | |
if not os.path.exists(file_path): | |
raise Exception(f"File not found: {file_path}") | |
# Compute a hash for the file to check for previous uploads | |
file_hash = compute_file_hash(file_path) | |
if file_hash in upload_cache: | |
logger.info(f"File {file_path} already uploaded. Returning cached media ID.") | |
return upload_cache[file_hash] | |
# Get the MIME type of the file | |
mime_type, _ = mimetypes.guess_type(file_path) | |
if not mime_type: | |
raise Exception(f"Could not determine the MIME type for file: {file_path}") | |
headers = { | |
'Authorization': f'Bearer {ACCESS_TOKEN}' | |
} | |
# Open the file and prepare the payload for upload | |
with open(file_path, 'rb') as video_file: | |
files = { | |
'file': (os.path.basename(file_path), video_file, mime_type) | |
} | |
data = { | |
'messaging_product': 'whatsapp' | |
} | |
response = requests.post(MEDIA_UPLOAD_URL, headers=headers, files=files, data=data) | |
if response.status_code == 200: | |
logger.info(f"Upload successful: {response.text}") | |
media_id = response.json()['id'] | |
# Cache the result so future calls can use the same media ID | |
upload_cache[file_hash] = media_id | |
save_cache() | |
return media_id | |
else: | |
logger.error(f"Upload failed: {response.text}") | |
raise Exception(f'Failed to upload media: {response.status_code}, {response.text}') | |
# Helper function to send a reply | |
async def send_reply(to: str, body: str, whatsapp_token: str, whatsapp_url:str, image:Any) -> Dict[str, Any]: | |
headers = { | |
"Authorization": f"Bearer {whatsapp_token}", | |
"Content-Type": "application/json" | |
} | |
text_data = { | |
"messaging_product": "whatsapp", | |
"to": to, | |
"type": "text", | |
"text": { | |
"body": body | |
} | |
} | |
responses = {} # To store the responses | |
async with httpx.AsyncClient() as client: | |
# response = await client.post(whatsapp_url, json=text_data, headers=headers) | |
text_response = await client.post(whatsapp_url, json=text_data, headers=headers) | |
if text_response.status_code != 200: | |
error_detail = text_response.json() | |
logger.error(f"Failed to send text reply: {error_detail}") | |
raise Exception(f"Failed to send text reply with status code {text_response.status_code}: {error_detail}") | |
responses["text"] = text_response.json() | |
# if response.status_code != 200: | |
# error_detail = response.json() | |
# logger.error(f"Failed to send reply: {error_detail}") | |
# raise Exception(f"Failed to send reply with status code {response.status_code}: {error_detail}") | |
# Initialize list to hold image responses | |
image_responses: List[Dict[str, Any]] = [] | |
if image: | |
# Get the list of full image URLs using your helper function. | |
links = get_image_links(image, base_url) | |
for link in links: | |
image_payload = { | |
"messaging_product": "whatsapp", | |
"recipient_type": "individual", | |
"to": to, | |
"type": "image", | |
"image": { | |
"id": "", | |
"link": link, | |
"caption": "" # Using the text body as caption; adjust if needed. | |
} | |
} | |
img_response = await client.post(whatsapp_url, json=image_payload, headers=headers) | |
if img_response.status_code != 200: | |
error_detail = img_response.json() | |
logger.error(f"Failed to send image: {error_detail}") | |
raise Exception(f"Failed to send image with status code {img_response.status_code}: {error_detail}") | |
image_responses.append(img_response.json()) | |
responses["images"] = image_responses | |
return responses | |
# return response.json() | |
# Helper function to generate a reply based on message content | |
async def generate_reply(sender: str, content: str, timestamp: int) -> str: | |
try: | |
received_time = datetime.fromtimestamp(int(timestamp) / 1000) # Assuming timestamp is in milliseconds | |
if "hello" in content.lower(): | |
return f"Hi {sender}, how can I assist you today?" | |
elif "test" in content.lower(): | |
return f"Hi {sender}, this is a reply to your test message." | |
elif received_time.hour < 12: | |
return f"Good morning, {sender}! How can I help you?" | |
else: | |
return f"Hello {sender}, I hope you're having a great day!" | |
except Exception as e: | |
logger.error(f"Error generating reply: {str(e)}", exc_info=True) | |
return f"Sorry {sender}, I couldn't process your message. Please try again." | |
async def process_message_with_llm( | |
sender_id: str, | |
content: str, | |
history: List[Dict[str, str]], | |
rag_system: Any, | |
whatsapp_token: str, | |
whatsapp_url:str, | |
agentMemory: Any = None, | |
memory:Any = None, | |
image_file_path: Optional[str] = None, | |
doc_path: Optional[str] = None, | |
video_file_path: Optional[str] = None, | |
) -> str: | |
"""Process message with retry logic.""" | |
try: | |
logger.info(f"Processing message for sender: {sender_id}") | |
generated_reply, image_path = await generate_response_from_gemini( | |
sender=sender_id, | |
content=content, | |
history=history, | |
rag_system=rag_system, | |
image_file_path=image_file_path, | |
doc_path=doc_path, | |
video_file_path=video_file_path, | |
agentMemory=agentMemory, | |
memory = memory | |
) | |
logger.info(f"Generated reply: {generated_reply}, extracted image path: {image_path}") | |
response = await send_reply(sender_id, generated_reply , whatsapp_token, whatsapp_url, image_path) | |
# return generated_reply | |
return generated_reply | |
except Exception as e: | |
logger.error(f"Error in process_message_with_retry: {str(e)}", exc_info=True) | |
return "Sorry, I couldn't generate a response at this time." | |
import markdown | |
from bs4 import BeautifulSoup | |
def format_response_text(response_text: str) -> str: | |
""" | |
Converts markdown-formatted text to plain text with proper newlines. | |
This will ensure bullet points, paragraphs, and other elements are formatted | |
for display in WhatsApp. | |
""" | |
# Convert markdown to HTML | |
html = markdown.markdown(response_text) | |
# Parse HTML and extract text using newline as separator | |
soup = BeautifulSoup(html, "html.parser") | |
formatted_text = soup.get_text(separator="\n") | |
return formatted_text | |
import re | |
import json | |
def process_llm_response(llm_output): | |
# If it's a string, attempt to extract JSON from markdown code fences. | |
if isinstance(llm_output, str): | |
pattern = r"```json\s*(\{.*\})\s*```" | |
match = re.search(pattern, llm_output, re.DOTALL) | |
if match: | |
json_str = match.group(1) | |
else: | |
json_str = llm_output.strip() | |
try: | |
parsed = json.loads(json_str) | |
if isinstance(parsed, dict) and "response" in parsed: | |
response_text = parsed.get("response", "") | |
# Optionally format the response text using our helper | |
# formatted_response = format_response_text(response_text) | |
references = parsed.get("references", []) | |
if isinstance(references, list): | |
image_paths = [ref.get("image") for ref in references | |
if ref.get("image") and ref.get("image") != "nan"] | |
else: | |
image_paths = [] | |
return response_text, image_paths | |
else: | |
# Fallback if the JSON doesn't have expected structure. | |
return llm_output, [] | |
except json.JSONDecodeError: | |
# Fallback: if JSON parsing fails, assume it's plain text. | |
return format_response_text(llm_output), [] | |
# If not a string, return something sensible. | |
return str(llm_output), [] | |
# def process_llm_response(llm_output): | |
# # If it's a string, attempt to extract JSON from markdown code fences. | |
# if isinstance(llm_output, str): | |
# # Try to capture JSON content if it's wrapped in ```json ... ``` | |
# pattern = r"```json\s*(\{.*\})\s*```" | |
# match = re.search(pattern, llm_output, re.DOTALL) | |
# if match: | |
# json_str = match.group(1) | |
# else: | |
# json_str = llm_output.strip() | |
# try: | |
# parsed = json.loads(json_str) | |
# # Check if parsed output has the expected keys. | |
# if isinstance(parsed, dict) and "response" in parsed: | |
# response_text = parsed.get("response", "") | |
# references = parsed.get("references", []) | |
# if isinstance(references, list): | |
# image_paths = [ref.get("image") for ref in references | |
# if ref.get("image") and ref.get("image") != "nan"] | |
# else: | |
# image_paths = [] | |
# return response_text, image_paths | |
# else: | |
# # Fallback: parsed JSON does not have the expected structure. | |
# return llm_output, [] | |
# except json.JSONDecodeError: | |
# # Fallback: if JSON parsing fails, assume it's plain text. | |
# return llm_output, [] | |
# # If not a string, ensure we return something sensible. | |
# return str(llm_output), [] | |
async def generate_response_from_gemini( | |
sender: str, | |
content: str, | |
history: List[Dict[str, str]], | |
rag_system: Any = None, | |
agentMemory: Any = None, | |
memory:Any = None, | |
image_file_path: Optional[str] = None, | |
doc_path: Optional[str] = None, | |
video_file_path: Optional[str] = None, | |
) -> str: | |
try: | |
logger.info(f"Generating response for sender: {sender}") | |
# Initialize the model | |
# model = genai.GenerativeModel("gemini-1.5-pro-002", system_instruction= system_prompt) | |
model = genai.GenerativeModel("gemini-1.5-flash", system_instruction= system_prompt) | |
# model = genai.GenerativeModel("gemini-exp-1206", system_instruction= system_prompt) | |
# Start chat with history | |
chat = model.start_chat(history=history) | |
if content: | |
if rag_system: | |
# keywords = extract_keywords_async(content) | |
# keywords = [] | |
# logger.info(f"Extracted Keywords: {keywords}") | |
# Implement RAG: Retrieve relevant documents | |
retrieved_docs = rag_system.retrieve(query_text = content, result_formatter=CustomResultFormatter()) | |
print(f"retrieved docs: {retrieved_docs}") | |
if retrieved_docs: | |
logger.info(f"Retrieved {len(retrieved_docs)} documents for context.") | |
# Format the retrieved documents as a context string | |
context = "\n\n".join([f"Source:{doc['doc_id']}\nContent: {doc['page_content']}\nImage: {doc['image']}" for doc in retrieved_docs]) | |
# img_paths = doc['images_path'] for doc in retrieved_docs | |
# Option 1: Append to history as a system message | |
history.append({"role": "user", "parts": f"Relevant documents:\n{context}"}) | |
# logger.info(f"History: {history}") | |
# Reinitialize chat with updated history | |
chat = model.start_chat(history=history) | |
# Process image | |
if image_file_path: | |
logger.info(f"Processing image at {image_file_path}") | |
image_data = PIL.Image.open(image_file_path) | |
response = await chat.send_message_async(image_data) | |
return response.text | |
# Process document | |
if doc_path: | |
logger.info(f"Processing document at {doc_path}") | |
doc_data = genai.upload_file(doc_path) | |
response = await chat.send_message_async(doc_data) | |
return response.text | |
# Process video (if supported) | |
if video_file_path: | |
logger.info(f"Processing video at {video_file_path}") | |
video_data = genai.upload_file(video_file_path) | |
response = await chat.send_message_async(video_data) | |
return response.text | |
# Implement video processing logic here | |
pass # Placeholder for video processing logic | |
# Send the user's message | |
response = await chat.send_message_async(content) | |
print(f"text: {response.text}") | |
response_text, image_paths = process_llm_response(response.text) | |
# response = await handle_function_call(response) | |
# return response.text | |
return response_text, image_paths | |
except Exception as e: | |
logger.error("Error in generate_response_from_gemini:", exc_info=True) | |
return "Sorry, I couldn't generate a response at this time." | |
async def handle_function_call(chat): | |
""" | |
Handle function calls from the Gemini API. | |
Args: | |
chat (ChatSession): The current chat session. | |
Returns: | |
The response after resolving function calls. | |
""" | |
# Continue the conversation and handle any function calls | |
while True: | |
response = chat.send_message_async(chat.history[-1]) | |
# Check if there are any function calls to handle | |
if response.candidates[0].content.parts[0].function_call: | |
function_call = response.candidates[0].content.parts[0].function_call | |
function_name = function_call.name | |
function_args = json.loads(function_call.args) | |
# Dispatch to the appropriate function | |
if function_name == "google_search": | |
# Handle async function call | |
result = await google_search( | |
query=function_args['query'], | |
num_results=function_args.get('num_results', '3') | |
) | |
# Send the function result back to continue the conversation | |
response = chat.send_message_async( | |
part={ | |
"function_response": { | |
"name": function_name, | |
"response": result | |
} | |
} | |
) | |
else: | |
# No more function calls, return the final response | |
return response | |
# Process message with retry logic | |
# async def process_message_with_retry( | |
# sender_id: str, | |
# content: str, | |
# history: List[str], | |
# timestamp: Optional[int] = None, | |
# media: Optional[Dict[str, Any]] = None, | |
# image_file_path: Optional[str] = None, | |
# doc_path: Optional[str] = None, | |
# ) -> Dict[str, Any]: | |
# """Process message with retry logic""" | |
# retries = 1 | |
# delay = 0.1 # Initial delay in seconds | |
# # for attempt in range(retries): | |
# try: | |
# logger.info(f"Sending message to the Gemini model...") | |
# generated_reply = await generate_response_from_gemini(sender = sender_id, content=content, history = history, timestamp = timestamp, image_file_path = image_file_path, media=media, doc_path = doc_path) | |
# logger.info(f"Reply generated: {generated_reply}") | |
# response = await send_reply(sender_id, generated_reply) | |
# return generated_reply | |
# return {"status": "success", "reply": generated_reply, "response": response} | |
# except Exception as e: | |
# logger.error(f"Error generating reply: {str(e)}", exc_info=True) | |
# return {"status": "error", "reply": "Sorry, I couldn't generate a response at this time."} | |
# logger.error(f"Attempt {attempt + 1} failed: {str(e)}", exc_info=True) | |
# if attempt < retries - 1: | |
# await asyncio.sleep(delay) | |
# delay *= 2 # Exponential backoff | |
# else: | |
# raise Exception(f"All {retries} attempts failed.") from e | |
# Example usage | |
# asyncio.run(process_message_with_retry("1234567890", "hello", 1700424056000)) | |
# async def generate_response_from_gemini(sender: str, content: str, timestamp: str, history: List[Dict[str, str]], media: Optional[Dict[str, Any]] = None, image_file_path: Optional[str] = None, doc_path: Optional[str] = None) -> str: | |
# try: | |
# print(f"Sender: {sender}") | |
# print(f"Content: {content}") | |
# print(f"Timestamp: {timestamp}") | |
# print(f"History: {history}") | |
# print(f"Media: {media}") | |
# # Initialize the model | |
# model = genai.GenerativeModel("gemini-1.5-pro-002") | |
# # Define the chat history | |
# chat = model.start_chat( | |
# history=history | |
# ) | |
# logger.info(f"file_path: {image_file_path}") | |
# if image_file_path: # Should be bytes or a file-like object | |
# prompt = "Describe the following image:" | |
# image_data = PIL.Image.open(image_file_path) | |
# print("Sending image to the Gemini model...") | |
# response = await chat.send_message_async(image_data) | |
# print(f"Model response: {response.text}") | |
# return response.text | |
# if doc_path: | |
# doc_data = genai.upload_file(doc_path) | |
# print("Sending document to the Gemini model...") | |
# response = await chat.send_message_async(doc_data) | |
# print(f"Model response: {response.text}") | |
# return response.text | |
# # Send the user's message | |
# print("Sending message to the Gemini model...") | |
# response = await chat.send_message_async(content) | |
# print(f"Model response: {response.text}") | |
# return response.text | |
# except Exception as e: | |
# print("Error generating reply from Gemini:", e) | |
# return "Sorry, I couldn't generate a response at this time." | |
# async def generate_response_from_chatgpt(sender: str, content: str, timestamp: str, history: str) -> str: | |
# """ | |
# Generate a reply using OpenAI's ChatGPT API. | |
# """ | |
# try: | |
# # # Initialize chat history if not provided | |
# # chat_history = chat_history or [] | |
# # # Append the current user message to the chat history | |
# # chat_history.append({"role": "user", "content": f"From {sender} at {timestamp}: {content}"}) | |
# messages = [ | |
# {"role": "system", "content": "You're an investor, a serial founder, and you've sold many startups. You understand nothing but business."}, | |
# {"role": "system", "content": f"Message History: {history}"}, | |
# {"role": "user", "content": f"From {sender} at {timestamp}: {content}"} | |
# ] | |
# print(f"Messages: {messages}") | |
# response = await client.chat.completions.create( | |
# model="gpt-3.5-turbo", | |
# messages=messages, | |
# max_tokens=200, | |
# temperature=0.5 | |
# ) | |
# chatgpt_response = response.choices[0].message.content.strip() | |
# # Append the assistant's response to the chat history | |
# # chat_history.append({"role": "assistant", "content": chatgpt_response}) | |
# return chatgpt_response | |
# except Exception as e: | |
# print("Error generating reply:", e) | |
# return "Sorry, I couldn't generate a response at this time." | |
# async def generate_response_from_chatgpt( | |
# sender: str, | |
# content: str, | |
# timestamp: str, | |
# image: Optional[bytes] = None, | |
# file: Optional[bytes] = None, | |
# file_name: Optional[str] = None, | |
# chat_history: Optional[List[Dict[str, str]]] = None, | |
# ) -> Dict[str, Any]: | |
# """ | |
# Generate a reply using OpenAI's GPT-4 API, including support for images, files, and maintaining chat history. | |
# """ | |
# try: | |
# # Initialize chat history if not provided | |
# chat_history = chat_history or [] | |
# # Append the current user message to the chat history | |
# chat_history.append({"role": "user", "content": f"From {sender} at {timestamp}: {content}"}) | |
# # Prepare files for the request | |
# files = [] | |
# if image: | |
# files.append({"name": "image.png", "type": "image/png", "content": image}) | |
# if file: | |
# files.append({"name": file_name or "file.txt", "type": "application/octet-stream", "content": file}) | |
# logger.debug(f"Chat History Before Response: {chat_history}") | |
# # Send the request to the GPT-4 API | |
# response = await client.chat.completions.create( | |
# model="gpt-4-vision", # Ensure this is the correct model for multimodal support | |
# messages=chat_history, | |
# files=files if files else None, # Include files if present | |
# max_tokens=200, | |
# temperature=0.5, | |
# ) | |
# # Parse the assistant's response | |
# chatgpt_response = response.choices[0].message.content.strip() | |
# # Append the assistant's response to the chat history | |
# chat_history.append({"role": "assistant", "content": chatgpt_response}) | |
# logger.debug(f"Chat History After Response: {chat_history}") | |
# # Return both the assistant's response and the updated chat history | |
# return {"response": chatgpt_response, "chat_history": chat_history} | |
# except Exception as e: | |
# logger.error("Error generating reply", exc_info=True) | |
# return {"response": "Sorry, I couldn't generate a response at this time.", "chat_history": chat_history} | |