Spaces:
Sleeping
Sleeping
import os | |
from fastapi import FastAPI, UploadFile, File | |
from google import genai | |
from google.genai import types | |
import uvicorn | |
app = FastAPI() | |
# Retrieve the GenAI API key from the environment variable. | |
api_key = os.getenv("GENAI_API_KEY") | |
if not api_key: | |
raise EnvironmentError("GENAI_API_KEY environment variable not set") | |
# Initialize the GenAI client. | |
client = genai.Client(api_key=api_key) | |
async def root(): | |
return { | |
"message": "Welcome to the Audio Similarity API!", | |
"usage": { | |
"endpoint": "/compare-audio", | |
"description": "POST two audio files (user recitation and professional qarri) for similarity analysis.", | |
"instructions": "Send audio files as form-data with keys 'audio1' and 'audio2'." | |
} | |
} | |
async def compare_audio( | |
audio1: UploadFile = File(...), | |
audio2: UploadFile = File(...) | |
): | |
# Read the uploaded audio files. | |
audio1_bytes = await audio1.read() | |
audio2_bytes = await audio2.read() | |
# Create a refined prompt that clearly identifies the audio sources. | |
prompt = ( | |
"""Please analyze and compare the two provided audio clips. | |
The first audio is the user's recitation, and the second audio is the professional qarri recitation. | |
Evaluate their similarity on a scale from 0 to 1, where: | |
- 1 indicates the user's recitation contains no mistakes compared to the professional version, | |
- 0 indicates there are significant mistakes. | |
Provide your response with: | |
1. A numerical similarity score on the first line. | |
2. A single sentence that indicates whether the user's recitation is similar, moderately similar, or dissimilar to the professional qarri.""" | |
) | |
# Generate the content using the Gemini model with the two audio inputs. | |
response = client.models.generate_content( | |
model='gemini-2.0-flash', | |
contents=[ | |
prompt, | |
types.Part.from_bytes( | |
data=audio1_bytes, | |
mime_type=audio1.content_type, | |
), | |
types.Part.from_bytes( | |
data=audio2_bytes, | |
mime_type=audio2.content_type, | |
) | |
] | |
) | |
# Return the model's response. | |
return {"result": response.text} | |
if __name__ == "__main__": | |
uvicorn.run(app, host="0.0.0.0", port=8000) | |