Spaces:

Hammad712
/

recitation-compare

Sleeping

App Files Files Community

Hammad712 commited on Mar 16

Commit

0318876

verified ·

1 Parent(s): ef20d33

Update main.py

Browse files

Files changed (1) hide show

main.py +24 -57

main.py CHANGED Viewed

@@ -20,8 +20,6 @@ os.environ["NUMBA_DISABLE_JIT"] = "1"
 MODEL = None
 PROCESSOR = None
 UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "quran_comparison_uploads")
-# Ensure upload directory exists
 os.makedirs(UPLOAD_DIR, exist_ok=True)
 # Response models
@@ -63,7 +61,7 @@ async def lifespan(app: FastAPI):
     initialize_model()
     yield
-# Create the FastAPI app with the lifespan handler and CORS middleware
 app = FastAPI(
     title="Quran Recitation Comparison API",
     description="API for comparing similarity between Quran recitations using Wav2Vec2 embeddings",
@@ -90,13 +88,10 @@ def load_audio(file_path, target_sr=16000, trim_silence=True, normalize=True):
     """Load and preprocess an audio file."""
     try:
         y, sr = librosa.load(file_path, sr=target_sr)
         if normalize:
             y = librosa.util.normalize(y)
         if trim_silence:
             y, _ = librosa.effects.trim(y, top_db=30)
         return y
     except Exception as e:
         raise HTTPException(status_code=400, detail=f"Error loading audio: {e}")
@@ -105,10 +100,8 @@ def load_audio(file_path, target_sr=16000, trim_silence=True, normalize=True):
 def get_deep_embedding(audio, sr=16000):
     """Extract frame-wise deep embeddings using the pretrained model."""
     global MODEL, PROCESSOR
     if MODEL is None or PROCESSOR is None:
         raise HTTPException(status_code=500, detail="Model not initialized")
     try:
         device = next(MODEL.parameters()).device
         input_values = PROCESSOR(
@@ -122,28 +115,22 @@ def get_deep_embedding(audio, sr=16000):
         hidden_states = outputs.hidden_states[-1]
         embedding_seq = hidden_states.squeeze(0).cpu().numpy()
         return embedding_seq
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error extracting embeddings: {e}")
-# Custom DTW implementation to avoid librosa.sequence.dtw issues
 def custom_dtw(X, Y, metric='euclidean'):
     """
-    Custom implementation of DTW to avoid librosa.sequence.dtw issues.
-    Parameters:
-    X, Y : numpy.ndarray
-        The two sequences to be aligned
-    metric : str, optional
-        The distance metric to use
-    Returns:
-    D : numpy.ndarray
-        The accumulated cost matrix
-    wp : list
-        The warping path
     """
     n, m = len(X[0]), len(Y[0])
     D = np.zeros((n+1, m+1))
     D[0, :] = np.inf
@@ -157,8 +144,7 @@ def custom_dtw(X, Y, metric='euclidean'):
             elif metric == 'cosine':
                 cost = 1 - np.dot(X[:, i-1], Y[:, j-1]) / (np.linalg.norm(X[:, i-1]) * np.linalg.norm(Y[:, j-1]))
             else:
-                cost = np.sum(np.abs(X[:, i-1] - Y[:, j-1]))  # Manhattan by default
             D[i, j] = cost + min(D[i-1, j], D[i, j-1], D[i-1, j-1])
     i, j = n, m
@@ -183,9 +169,8 @@ def compute_dtw_distance(features1, features2):
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error computing DTW distance: {e}")
-# Interpret similarity
 def interpret_similarity(norm_distance):
-    """Interpret the normalized distance value."""
     if norm_distance == 0:
         result = "The recitations are identical based on the deep embeddings."
         score = 100
@@ -204,12 +189,10 @@ def interpret_similarity(norm_distance):
     else:
         result = "The recitations are quite different."
         score = max(0, 100 - norm_distance)
     return result, score
 # Clean up temporary files
 def cleanup_temp_files(file_paths):
-    """Remove temporary files."""
     for file_path in file_paths:
         if os.path.exists(file_path):
             try:
@@ -224,63 +207,47 @@ async def compare_recitations(
     file1: UploadFile = File(...),
     file2: UploadFile = File(...)
 ):
-    """
-    Compare two Quran recitations and return similarity metrics.
-    - **file1**: First audio file
-    - **file2**: Second audio file
-    Returns:
-    - **similarity_score**: Score between 0-100 indicating similarity
-    - **interpretation**: Text interpretation of the similarity
-    """
-    if MODEL is None or PROCESSOR is None:
-        raise HTTPException(status_code=500, detail="Model not initialized")
     temp_file1 = os.path.join(UPLOAD_DIR, f"{uuid.uuid4()}.wav")
     temp_file2 = os.path.join(UPLOAD_DIR, f"{uuid.uuid4()}.wav")
     try:
         with open(temp_file1, "wb") as f:
             shutil.copyfileobj(file1.file, f)
         with open(temp_file2, "wb") as f:
             shutil.copyfileobj(file2.file, f)
         audio1 = load_audio(temp_file1)
         audio2 = load_audio(temp_file2)
         embedding1 = get_deep_embedding(audio1)
         embedding2 = get_deep_embedding(audio2)
         norm_distance = compute_dtw_distance(embedding1.T, embedding2.T)
         interpretation, similarity_score = interpret_similarity(norm_distance)
         background_tasks.add_task(cleanup_temp_files, [temp_file1, temp_file2])
-        return {
-            "similarity_score": similarity_score,
-            "interpretation": interpretation
-        }
     except Exception as e:
         background_tasks.add_task(cleanup_temp_files, [temp_file1, temp_file2])
-        raise HTTPException(status_code=500, detail=str(e))
 # Health check endpoint
 @app.get("/health")
 async def health_check():
-    """Health check endpoint."""
     if MODEL is None or PROCESSOR is None:
-        return JSONResponse(
-            status_code=503,
-            content={"status": "error", "message": "Model not initialized"}
-        )
     return {"status": "ok", "model_loaded": True}
 # Run the FastAPI app
 if __name__ == "__main__":
     import uvicorn
-    port = int(os.environ.get("PORT", 7860))  # Default to port 7860
     uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False)

 MODEL = None
 PROCESSOR = None
 UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "quran_comparison_uploads")
 os.makedirs(UPLOAD_DIR, exist_ok=True)
 # Response models
     initialize_model()
     yield
+# Create the FastAPI app with the lifespan handler and add CORS middleware
 app = FastAPI(
     title="Quran Recitation Comparison API",
     description="API for comparing similarity between Quran recitations using Wav2Vec2 embeddings",
     """Load and preprocess an audio file."""
     try:
         y, sr = librosa.load(file_path, sr=target_sr)
         if normalize:
             y = librosa.util.normalize(y)
         if trim_silence:
             y, _ = librosa.effects.trim(y, top_db=30)
         return y
     except Exception as e:
         raise HTTPException(status_code=400, detail=f"Error loading audio: {e}")
 def get_deep_embedding(audio, sr=16000):
     """Extract frame-wise deep embeddings using the pretrained model."""
     global MODEL, PROCESSOR
     if MODEL is None or PROCESSOR is None:
         raise HTTPException(status_code=500, detail="Model not initialized")
     try:
         device = next(MODEL.parameters()).device
         input_values = PROCESSOR(
         hidden_states = outputs.hidden_states[-1]
         embedding_seq = hidden_states.squeeze(0).cpu().numpy()
         return embedding_seq
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error extracting embeddings: {e}")
+# Custom DTW implementation to avoid issues with librosa's dtw
 def custom_dtw(X, Y, metric='euclidean'):
     """
+    Custom implementation of DTW.
+    X and Y are expected to be 2D numpy arrays.
     """
+    # Check inputs are 2D and non-empty
+    if X.ndim != 2 or Y.ndim != 2:
+        raise ValueError("Input features must be 2D arrays.")
+    if X.shape[1] == 0 or Y.shape[1] == 0:
+        raise ValueError("Empty embedding sequence encountered.")
     n, m = len(X[0]), len(Y[0])
     D = np.zeros((n+1, m+1))
     D[0, :] = np.inf
             elif metric == 'cosine':
                 cost = 1 - np.dot(X[:, i-1], Y[:, j-1]) / (np.linalg.norm(X[:, i-1]) * np.linalg.norm(Y[:, j-1]))
             else:
+                cost = np.sum(np.abs(X[:, i-1] - Y[:, j-1]))
             D[i, j] = cost + min(D[i-1, j], D[i, j-1], D[i-1, j-1])
     i, j = n, m
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error computing DTW distance: {e}")
+# Interpret similarity based on the normalized distance
 def interpret_similarity(norm_distance):
     if norm_distance == 0:
         result = "The recitations are identical based on the deep embeddings."
         score = 100
     else:
         result = "The recitations are quite different."
         score = max(0, 100 - norm_distance)
     return result, score
 # Clean up temporary files
 def cleanup_temp_files(file_paths):
     for file_path in file_paths:
         if os.path.exists(file_path):
             try:
     file1: UploadFile = File(...),
     file2: UploadFile = File(...)
 ):
     temp_file1 = os.path.join(UPLOAD_DIR, f"{uuid.uuid4()}.wav")
     temp_file2 = os.path.join(UPLOAD_DIR, f"{uuid.uuid4()}.wav")
     try:
+        # Save uploaded files to temporary locations
         with open(temp_file1, "wb") as f:
             shutil.copyfileobj(file1.file, f)
         with open(temp_file2, "wb") as f:
             shutil.copyfileobj(file2.file, f)
+        # Load audio files
         audio1 = load_audio(temp_file1)
         audio2 = load_audio(temp_file2)
+        # Extract embeddings
         embedding1 = get_deep_embedding(audio1)
         embedding2 = get_deep_embedding(audio2)
+        # Compute DTW distance (transpose so each column represents a frame)
         norm_distance = compute_dtw_distance(embedding1.T, embedding2.T)
         interpretation, similarity_score = interpret_similarity(norm_distance)
         background_tasks.add_task(cleanup_temp_files, [temp_file1, temp_file2])
+        return {"similarity_score": similarity_score, "interpretation": interpretation}
+    except HTTPException as he:
+        background_tasks.add_task(cleanup_temp_files, [temp_file1, temp_file2])
+        raise he
     except Exception as e:
         background_tasks.add_task(cleanup_temp_files, [temp_file1, temp_file2])
+        print(f"Unexpected error in /compare: {e}")
+        raise HTTPException(status_code=500, detail="An unexpected error occurred during comparison.")
 # Health check endpoint
 @app.get("/health")
 async def health_check():
     if MODEL is None or PROCESSOR is None:
+        return JSONResponse(status_code=503, content={"status": "error", "message": "Model not initialized"})
     return {"status": "ok", "model_loaded": True}
 # Run the FastAPI app
 if __name__ == "__main__":
     import uvicorn
+    port = int(os.environ.get("PORT", 7860))
     uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False)