Spaces:

Hammad712
/

recitation-compare

Sleeping

App Files Files Community

Hammad712 commited on 9 days ago

Commit

612c535

verified ·

1 Parent(s): 51b44fe

Update main.py

Browse files

Files changed (1) hide show

main.py +28 -21

main.py CHANGED Viewed

@@ -4,6 +4,7 @@ from fastapi import FastAPI, UploadFile, File
 import uvicorn
 import torch
 import librosa
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 from librosa.sequence import dtw
 from google import genai
@@ -11,17 +12,9 @@ from google.genai import types
 app = FastAPI()
-# ---------------------------
-# Gemini-based Comparison API
-# ---------------------------
-# Retrieve the GenAI API key from the environment variable.
-genai_api_key = os.getenv("GENAI_API_KEY")
-if not genai_api_key:
-    raise EnvironmentError("GENAI_API_KEY environment variable not set")
-# Initialize the GenAI client.
-client = genai.Client(api_key=genai_api_key)
 # ---------------------------
 # DTW-based Comparison Class
@@ -30,7 +23,7 @@ class QuranRecitationComparer:
     def __init__(self, model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic", auth_token=None):
         """Initialize the Quran recitation comparer with a specific Wav2Vec2 model."""
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        # Load model and processor once during initialization
         if auth_token:
             self.processor = Wav2Vec2Processor.from_pretrained(model_name, token=auth_token)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name, token=auth_token)
@@ -39,14 +32,19 @@ class QuranRecitationComparer:
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name)
         self.model = self.model.to(self.device)
         self.model.eval()
-        # Cache for embeddings to avoid recomputation
         self.embedding_cache = {}
     def load_audio(self, file_path, target_sr=16000, trim_silence=True, normalize=True):
         """Load and preprocess an audio file."""
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"Audio file not found: {file_path}")
-        y, sr = librosa.load(file_path, sr=target_sr)
         if normalize:
             y = librosa.util.normalize(y)
         if trim_silence:
@@ -121,15 +119,26 @@ class QuranRecitationComparer:
         """Clear the embedding cache to free memory."""
         self.embedding_cache = {}
-# Retrieve HuggingFace auth token from environment variable (if needed).
-hf_auth_token = os.getenv("HF_AUTH_TOKEN")
-# Initialize the comparer instance once at startup.
-comparer = QuranRecitationComparer(auth_token=hf_auth_token)
 # ---------------------------
 # API Endpoints
 # ---------------------------
 @app.get("/")
 async def root():
     return {
@@ -188,8 +197,6 @@ Provide your response with:
             )
         ]
     )
-    # Return the model's response.
     return {"result": response.text}
 @app.post("/compare-dtw")

 import uvicorn
 import torch
 import librosa
+from audioread.exceptions import NoBackendError
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 from librosa.sequence import dtw
 from google import genai
 app = FastAPI()
+# Global variables to hold our loaded models/clients.
+client = None
+comparer = None
 # ---------------------------
 # DTW-based Comparison Class
     def __init__(self, model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic", auth_token=None):
         """Initialize the Quran recitation comparer with a specific Wav2Vec2 model."""
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Load model and processor once during initialization.
         if auth_token:
             self.processor = Wav2Vec2Processor.from_pretrained(model_name, token=auth_token)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name, token=auth_token)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name)
         self.model = self.model.to(self.device)
         self.model.eval()
+        # Cache for embeddings to avoid recomputation.
         self.embedding_cache = {}
     def load_audio(self, file_path, target_sr=16000, trim_silence=True, normalize=True):
         """Load and preprocess an audio file."""
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"Audio file not found: {file_path}")
+        try:
+            y, sr = librosa.load(file_path, sr=target_sr)
+        except NoBackendError as e:
+            raise RuntimeError(
+                "Failed to load audio using librosa. Please ensure you have a valid audio backend installed (e.g., ffmpeg)."
+            ) from e
         if normalize:
             y = librosa.util.normalize(y)
         if trim_silence:
         """Clear the embedding cache to free memory."""
         self.embedding_cache = {}
+# ---------------------------
+# Application Startup
+# ---------------------------
+@app.on_event("startup")
+async def startup_event():
+    global client, comparer
+    # Load the GenAI API key from environment variable.
+    genai_api_key = os.getenv("GENAI_API_KEY")
+    if not genai_api_key:
+        raise EnvironmentError("GENAI_API_KEY environment variable not set")
+    client = genai.Client(api_key=genai_api_key)
+    # Retrieve HuggingFace auth token from environment variable (if needed).
+    hf_auth_token = os.getenv("HF_AUTH_TOKEN")
+    # Initialize the comparer instance once at startup.
+    comparer = QuranRecitationComparer(auth_token=hf_auth_token)
 # ---------------------------
 # API Endpoints
 # ---------------------------
 @app.get("/")
 async def root():
     return {
             )
         ]
     )
     return {"result": response.text}
 @app.post("/compare-dtw")