Spaces:

abhisheksan
/

credify

Running

App Files Files Community

abhisheksan commited on Sep 29, 2024

Commit

05bf773

1 Parent(s): b4b3464

Refactor main.py to remove unused import and disable JIT

Browse files

Files changed (3) hide show

app/main.py +1 -3
app/services/audio_deepfake_service.py +35 -19
app/utils/forgery_video_utils.py +69 -28

app/main.py CHANGED Viewed

@@ -1,13 +1,11 @@
-from fastapi import FastAPI, Request, Response
 from fastapi.responses import JSONResponse
 from app.api.routes import router
 from app.core.logging_config import configure_logging
 from app.core.firebase_config import initialize_firebase
 from app.api.forgery_routes import router as forgery_router
 import logging
-import numba
-numba.config.DISABLE_JIT = True
 app = FastAPI()

+from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 from app.api.routes import router
 from app.core.logging_config import configure_logging
 from app.core.firebase_config import initialize_firebase
 from app.api.forgery_routes import router as forgery_router
 import logging
 app = FastAPI()

app/services/audio_deepfake_service.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import numpy as np
 import librosa as lb
 from tensorflow.keras.models import load_model
 from app.utils.file_utils import get_file_content
 import io
 import logging
@@ -13,34 +15,47 @@ class AudioDeepfakeService:
     def create_mel_spectrogram_sample(self, audio_content, sr=22050, sample_time=1.5, n_mels=64):
         logging.info("Creating mel spectrogram sample")
-        y, sr = lb.load(io.BytesIO(audio_content), sr=sr)
-        logging.info(f"Audio loaded with sample rate: {sr}")
-        sample_length = int(sr * sample_time)
-        if len(y) < sample_length:
-            logging.warning("Audio file is too short")
-            raise ValueError("Audio file is too short")
-        start = 0
-        end = start + sample_length
-        m = lb.feature.melspectrogram(y=y[start:end], sr=sr, n_mels=n_mels)
-        m = np.abs(m)
-        m /= 80
-        logging.info("Mel spectrogram sample created successfully")
-        return np.expand_dims(m, axis=-1)
     def detect_deepfake(self, firebase_filename):
         logging.info(f"Detecting deepfake for audio file: {firebase_filename}")
         try:
             audio_content = get_file_content(firebase_filename)
-            logging.info("Audio content retrieved successfully")
             sample = self.create_mel_spectrogram_sample(audio_content)
             logging.info("Mel spectrogram sample created")
             prediction = self.model.predict(np.expand_dims(sample, axis=0))[0][0]
             logging.info(f"Raw prediction: {prediction}")
-            result = "Fake" if prediction > 0.5 else "Real"
-            confidence = prediction if prediction > 0.5 else 1 - prediction
             result_dict = {
                 "prediction": result,
@@ -52,4 +67,5 @@ class AudioDeepfakeService:
         except Exception as e:
             logging.error(f"Error processing audio: {str(e)}")
-            raise ValueError(f"Error processing audio: {str(e)}")

 import numpy as np
 import librosa as lb
 from tensorflow.keras.models import load_model
+import traceback
 from app.utils.file_utils import get_file_content
 import io
 import logging
     def create_mel_spectrogram_sample(self, audio_content, sr=22050, sample_time=1.5, n_mels=64):
         logging.info("Creating mel spectrogram sample")
+        try:
+            y, sr = lb.load(io.BytesIO(audio_content), sr=sr)
+            logging.info(f"Audio loaded with sample rate: {sr}, length: {len(y)}")
+            sample_length = int(sr * sample_time)
+            if len(y) < sample_length:
+                logging.warning(f"Audio file is too short. Padding from {len(y)} to {sample_length}")
+                y = np.pad(y, (0, sample_length - len(y)), mode='constant')
+            start = 0
+            end = start + sample_length
+            m = lb.feature.melspectrogram(y=y[start:end], sr=sr, n_mels=n_mels)
+            m = np.abs(m)
+            m = lb.power_to_db(m, ref=np.max)  # Convert to dB scale
+            m = (m - m.min()) / (m.max() - m.min())  # Normalize to [0, 1]
+            logging.info("Mel spectrogram sample created successfully")
+            return np.expand_dims(m, axis=-1)
+        except Exception as e:
+            logging.error(f"Error creating mel spectrogram: {str(e)}")
+            logging.error(traceback.format_exc())
+            return None
     def detect_deepfake(self, firebase_filename):
         logging.info(f"Detecting deepfake for audio file: {firebase_filename}")
         try:
             audio_content = get_file_content(firebase_filename)
+            logging.info(f"Audio content retrieved successfully, size: {len(audio_content)} bytes")
             sample = self.create_mel_spectrogram_sample(audio_content)
+            if sample is None:
+                logging.error("Failed to create mel spectrogram sample")
+                return {"prediction": "Error", "confidence": 0.0, "raw_prediction": 0.0}
             logging.info("Mel spectrogram sample created")
             prediction = self.model.predict(np.expand_dims(sample, axis=0))[0][0]
             logging.info(f"Raw prediction: {prediction}")
+            is_fake = prediction > 0.5
+            confidence = prediction if is_fake else 1 - prediction
+            result = "Fake" if is_fake else "Real"
             result_dict = {
                 "prediction": result,
         except Exception as e:
             logging.error(f"Error processing audio: {str(e)}")
+            logging.error(traceback.format_exc())
+            return {"prediction": "Error", "confidence": 0.0, "raw_prediction": 0.0}

app/utils/forgery_video_utils.py CHANGED Viewed

@@ -2,10 +2,12 @@ import av
 import numpy as np
 from PIL import Image
 import io
 from app.utils.file_utils import get_file_content, upload_file_to_firebase, remove_temp_file
 import logging
 import uuid
 from typing import List, Tuple
 async def extract_audio(firebase_filename: str) -> str:
     try:
@@ -17,28 +19,72 @@ async def extract_audio(firebase_filename: str) -> str:
             logging.warning(f"No audio stream found in {firebase_filename}")
             return None
-        output_container = av.open(io.BytesIO(), mode='w', format='wav')
         output_stream = output_container.add_stream('pcm_s16le', rate=audio_stream.rate)
         for frame in input_container.decode(audio_stream):
             for packet in output_stream.encode(frame):
                 output_container.mux(packet)
         # Flush the stream
         for packet in output_stream.encode(None):
             output_container.mux(packet)
         output_container.close()
-        audio_content = output_container.data.getvalue()
         audio_filename = f"{firebase_filename}_audio.wav"
         await upload_file_to_firebase(audio_content, audio_filename)
         return audio_filename
     except Exception as e:
         logging.error(f"Error extracting audio: {str(e)}")
     return None
 async def extract_frames(firebase_filename: str, max_frames: int = 10) -> List[str]:
     frames = []
     video_content = get_file_content(firebase_filename)
@@ -69,17 +115,6 @@ async def extract_frames(firebase_filename: str, max_frames: int = 10) -> List[s
     return frames
-import av
-import numpy as np
-from PIL import Image
-import io
-from app.utils.file_utils import get_file_content, upload_file_to_firebase, remove_temp_file
-import logging
-import uuid
-from typing import List, Tuple
-# ... (previous functions remain unchanged)
 async def compress_and_process_video(firebase_filename: str, target_size_mb: int = 50, max_duration: int = 60) -> str:
     video_content = get_file_content(firebase_filename)
@@ -120,21 +155,25 @@ async def compress_and_process_video(firebase_filename: str, target_size_mb: int
         if audio_stream:
             output_audio_stream = output_container.add_stream('aac', rate=audio_stream.rate)
-            output_audio_stream.bit_rate = 128000  # 128k bitrate for audio
-        for frame in input_container.decode(video=0):
-            if frame.time > duration:
-                break
-            new_frame = frame.reformat(width=new_width, height=new_height, format='yuv420p')
-            for packet in output_video_stream.encode(new_frame):
-                output_container.mux(packet)
-        if audio_stream:
-            for frame in input_container.decode(audio=0):
-                if frame.time > duration:
-                    break
-                for packet in output_audio_stream.encode(frame):
-                    output_container.mux(packet)
         # Flush streams
         for packet in output_video_stream.encode(None):
@@ -151,8 +190,10 @@ async def compress_and_process_video(firebase_filename: str, target_size_mb: int
         output_filename = f"{firebase_filename}_compressed.mp4"
         await upload_file_to_firebase(compressed_content, output_filename)
         return output_filename
     except Exception as e:
         logging.error(f"Error compressing and processing video: {str(e)}")
         raise

 import numpy as np
 from PIL import Image
 import io
+import traceback
 from app.utils.file_utils import get_file_content, upload_file_to_firebase, remove_temp_file
 import logging
 import uuid
 from typing import List, Tuple
+import librosa
 async def extract_audio(firebase_filename: str) -> str:
     try:
             logging.warning(f"No audio stream found in {firebase_filename}")
             return None
+        logging.info(f"Audio stream found: {audio_stream}")
+        logging.info(f"Audio codec: {audio_stream.codec_context.name}")
+        logging.info(f"Audio sample rate: {audio_stream.rate}")
+        logging.info(f"Audio bit rate: {audio_stream.bit_rate}")
+        output_buffer = io.BytesIO()
+        output_container = av.open(output_buffer, mode='w', format='wav')
         output_stream = output_container.add_stream('pcm_s16le', rate=audio_stream.rate)
+        frame_count = 0
         for frame in input_container.decode(audio_stream):
+            frame_count += 1
             for packet in output_stream.encode(frame):
                 output_container.mux(packet)
+        logging.info(f"Processed {frame_count} audio frames")
         # Flush the stream
         for packet in output_stream.encode(None):
             output_container.mux(packet)
         output_container.close()
+        audio_content = output_buffer.getvalue()
+        audio_size = len(audio_content)
+        logging.info(f"Extracted audio size: {audio_size} bytes")
+        if audio_size < 1024:  # Check if audio content is too small (less than 1KB)
+            logging.warning(f"Extracted audio is too short for {firebase_filename}")
+            return None
         audio_filename = f"{firebase_filename}_audio.wav"
         await upload_file_to_firebase(audio_content, audio_filename)
+        logging.info(f"Audio extracted and uploaded: {audio_filename}")
         return audio_filename
     except Exception as e:
         logging.error(f"Error extracting audio: {str(e)}")
+        logging.error(traceback.format_exc())
     return None
+def detect_speech(audio_content: bytes) -> bool:
+    try:
+        y, sr = librosa.load(io.BytesIO(audio_content), sr=None)
+        logging.info(f"Loaded audio with sample rate: {sr}, length: {len(y)}")
+        # Calculate the root mean square energy
+        rms = librosa.feature.rms(y=y)[0]
+        # Calculate the percentage of frames with energy above a threshold
+        threshold = 0.01  # Adjust this value based on your needs
+        speech_frames = np.sum(rms > threshold)
+        speech_percentage = speech_frames / len(rms)
+        logging.info(f"Speech detection: {speech_percentage:.2%} of frames above threshold")
+        # If more than 10% of frames have energy above the threshold, consider it speech
+        is_speech = speech_percentage > 0.1
+        logging.info(f"Speech detected: {is_speech}")
+        return is_speech
+    except Exception as e:
+        logging.error(f"Error detecting speech: {str(e)}")
+        logging.error(traceback.format_exc())
+        return False
 async def extract_frames(firebase_filename: str, max_frames: int = 10) -> List[str]:
     frames = []
     video_content = get_file_content(firebase_filename)
     return frames
 async def compress_and_process_video(firebase_filename: str, target_size_mb: int = 50, max_duration: int = 60) -> str:
     video_content = get_file_content(firebase_filename)
         if audio_stream:
             output_audio_stream = output_container.add_stream('aac', rate=audio_stream.rate)
+            output_audio_stream.bit_rate = min(128000, audio_stream.bit_rate or 128000)  # 128k bitrate for audio, or lower if original is lower
+        for packet in input_container.demux((video_stream, audio_stream) if audio_stream else (video_stream,)):
+            if packet.dts is None:
+                continue
+            if packet.stream.type == 'video':
+                for frame in packet.decode():
+                    if frame.time > duration:
+                        break
+                    new_frame = frame.reformat(width=new_width, height=new_height, format='yuv420p')
+                    for packet in output_video_stream.encode(new_frame):
+                        output_container.mux(packet)
+            elif packet.stream.type == 'audio' and audio_stream:
+                for frame in packet.decode():
+                    if frame.time > duration:
+                        break
+                    for packet in output_audio_stream.encode(frame):
+                        output_container.mux(packet)
         # Flush streams
         for packet in output_video_stream.encode(None):
         output_filename = f"{firebase_filename}_compressed.mp4"
         await upload_file_to_firebase(compressed_content, output_filename)
+        logging.info(f"Compressed video uploaded to Firebase: {output_filename}")
         return output_filename
     except Exception as e:
         logging.error(f"Error compressing and processing video: {str(e)}")
+        logging.error(traceback.format_exc())
         raise