Spaces:

Hammad712
/

recitation-compare

Sleeping

App Files Files Community

recitation-compare / main.py

Hammad712

Update main.py

8bb5ed1 verified 4 months ago

raw

history blame

2.35 kB

	import os
	from fastapi import FastAPI, UploadFile, File
	from google import genai
	from google.genai import types
	import uvicorn

	app = FastAPI()

	# Retrieve the GenAI API key from the environment variable.
	api_key = os.getenv("GENAI_API_KEY")
	if not api_key:
	raise EnvironmentError("GENAI_API_KEY environment variable not set")

	# Initialize the GenAI client.
	client = genai.Client(api_key=api_key)

	@app.get("/")
	async def root():
	return {
	"message": "Welcome to the Audio Similarity API!",
	"usage": {
	"endpoint": "/compare-audio",
	"description": "POST two audio files (user recitation and professional qarri) for similarity analysis.",
	"instructions": "Send audio files as form-data with keys 'audio1' and 'audio2'."
	}
	}

	@app.post("/compare-audio")
	async def compare_audio(
	audio1: UploadFile = File(...),
	audio2: UploadFile = File(...)
	):
	# Read the uploaded audio files.
	audio1_bytes = await audio1.read()
	audio2_bytes = await audio2.read()

	# Create a refined prompt that clearly identifies the audio sources.
	prompt = (
	"""Please analyze and compare the two provided audio clips.
	The first audio is the user's recitation, and the second audio is the professional qarri recitation.
	Evaluate their similarity on a scale from 0 to 1, where:
	- 1 indicates the user's recitation contains no mistakes compared to the professional version,
	- 0 indicates there are significant mistakes.
	Provide your response with:
	1. A numerical similarity score on the first line.
	2. A single sentence that indicates whether the user's recitation is similar, moderately similar, or dissimilar to the professional qarri."""
	)

	# Generate the content using the Gemini model with the two audio inputs.
	response = client.models.generate_content(
	model='gemini-2.0-flash',
	contents=[
	prompt,
	types.Part.from_bytes(
	data=audio1_bytes,
	mime_type=audio1.content_type,
	),
	types.Part.from_bytes(
	data=audio2_bytes,
	mime_type=audio2.content_type,
	)
	]
	)

	# Return the model's response.
	return {"result": response.text}

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000)