abhisheksan commited on
Commit
05bf773
·
1 Parent(s): b4b3464

Refactor main.py to remove unused import and disable JIT

Browse files
app/main.py CHANGED
@@ -1,13 +1,11 @@
1
 
2
- from fastapi import FastAPI, Request, Response
3
  from fastapi.responses import JSONResponse
4
  from app.api.routes import router
5
  from app.core.logging_config import configure_logging
6
  from app.core.firebase_config import initialize_firebase
7
  from app.api.forgery_routes import router as forgery_router
8
  import logging
9
- import numba
10
- numba.config.DISABLE_JIT = True
11
 
12
  app = FastAPI()
13
 
 
1
 
2
+ from fastapi import FastAPI, Request
3
  from fastapi.responses import JSONResponse
4
  from app.api.routes import router
5
  from app.core.logging_config import configure_logging
6
  from app.core.firebase_config import initialize_firebase
7
  from app.api.forgery_routes import router as forgery_router
8
  import logging
 
 
9
 
10
  app = FastAPI()
11
 
app/services/audio_deepfake_service.py CHANGED
@@ -1,6 +1,8 @@
 
1
  import numpy as np
2
  import librosa as lb
3
  from tensorflow.keras.models import load_model
 
4
  from app.utils.file_utils import get_file_content
5
  import io
6
  import logging
@@ -13,34 +15,47 @@ class AudioDeepfakeService:
13
 
14
  def create_mel_spectrogram_sample(self, audio_content, sr=22050, sample_time=1.5, n_mels=64):
15
  logging.info("Creating mel spectrogram sample")
16
- y, sr = lb.load(io.BytesIO(audio_content), sr=sr)
17
- logging.info(f"Audio loaded with sample rate: {sr}")
18
-
19
- sample_length = int(sr * sample_time)
20
- if len(y) < sample_length:
21
- logging.warning("Audio file is too short")
22
- raise ValueError("Audio file is too short")
23
-
24
- start = 0
25
- end = start + sample_length
26
- m = lb.feature.melspectrogram(y=y[start:end], sr=sr, n_mels=n_mels)
27
- m = np.abs(m)
28
- m /= 80
29
- logging.info("Mel spectrogram sample created successfully")
30
- return np.expand_dims(m, axis=-1)
 
 
 
 
 
 
31
 
32
  def detect_deepfake(self, firebase_filename):
33
  logging.info(f"Detecting deepfake for audio file: {firebase_filename}")
34
  try:
35
  audio_content = get_file_content(firebase_filename)
36
- logging.info("Audio content retrieved successfully")
 
37
  sample = self.create_mel_spectrogram_sample(audio_content)
 
 
 
 
38
  logging.info("Mel spectrogram sample created")
39
  prediction = self.model.predict(np.expand_dims(sample, axis=0))[0][0]
40
  logging.info(f"Raw prediction: {prediction}")
41
 
42
- result = "Fake" if prediction > 0.5 else "Real"
43
- confidence = prediction if prediction > 0.5 else 1 - prediction
 
 
44
 
45
  result_dict = {
46
  "prediction": result,
@@ -52,4 +67,5 @@ class AudioDeepfakeService:
52
 
53
  except Exception as e:
54
  logging.error(f"Error processing audio: {str(e)}")
55
- raise ValueError(f"Error processing audio: {str(e)}")
 
 
1
+
2
  import numpy as np
3
  import librosa as lb
4
  from tensorflow.keras.models import load_model
5
+ import traceback
6
  from app.utils.file_utils import get_file_content
7
  import io
8
  import logging
 
15
 
16
  def create_mel_spectrogram_sample(self, audio_content, sr=22050, sample_time=1.5, n_mels=64):
17
  logging.info("Creating mel spectrogram sample")
18
+ try:
19
+ y, sr = lb.load(io.BytesIO(audio_content), sr=sr)
20
+ logging.info(f"Audio loaded with sample rate: {sr}, length: {len(y)}")
21
+
22
+ sample_length = int(sr * sample_time)
23
+ if len(y) < sample_length:
24
+ logging.warning(f"Audio file is too short. Padding from {len(y)} to {sample_length}")
25
+ y = np.pad(y, (0, sample_length - len(y)), mode='constant')
26
+
27
+ start = 0
28
+ end = start + sample_length
29
+ m = lb.feature.melspectrogram(y=y[start:end], sr=sr, n_mels=n_mels)
30
+ m = np.abs(m)
31
+ m = lb.power_to_db(m, ref=np.max) # Convert to dB scale
32
+ m = (m - m.min()) / (m.max() - m.min()) # Normalize to [0, 1]
33
+ logging.info("Mel spectrogram sample created successfully")
34
+ return np.expand_dims(m, axis=-1)
35
+ except Exception as e:
36
+ logging.error(f"Error creating mel spectrogram: {str(e)}")
37
+ logging.error(traceback.format_exc())
38
+ return None
39
 
40
  def detect_deepfake(self, firebase_filename):
41
  logging.info(f"Detecting deepfake for audio file: {firebase_filename}")
42
  try:
43
  audio_content = get_file_content(firebase_filename)
44
+ logging.info(f"Audio content retrieved successfully, size: {len(audio_content)} bytes")
45
+
46
  sample = self.create_mel_spectrogram_sample(audio_content)
47
+ if sample is None:
48
+ logging.error("Failed to create mel spectrogram sample")
49
+ return {"prediction": "Error", "confidence": 0.0, "raw_prediction": 0.0}
50
+
51
  logging.info("Mel spectrogram sample created")
52
  prediction = self.model.predict(np.expand_dims(sample, axis=0))[0][0]
53
  logging.info(f"Raw prediction: {prediction}")
54
 
55
+ is_fake = prediction > 0.5
56
+ confidence = prediction if is_fake else 1 - prediction
57
+
58
+ result = "Fake" if is_fake else "Real"
59
 
60
  result_dict = {
61
  "prediction": result,
 
67
 
68
  except Exception as e:
69
  logging.error(f"Error processing audio: {str(e)}")
70
+ logging.error(traceback.format_exc())
71
+ return {"prediction": "Error", "confidence": 0.0, "raw_prediction": 0.0}
app/utils/forgery_video_utils.py CHANGED
@@ -2,10 +2,12 @@ import av
2
  import numpy as np
3
  from PIL import Image
4
  import io
 
5
  from app.utils.file_utils import get_file_content, upload_file_to_firebase, remove_temp_file
6
  import logging
7
  import uuid
8
  from typing import List, Tuple
 
9
 
10
  async def extract_audio(firebase_filename: str) -> str:
11
  try:
@@ -17,28 +19,72 @@ async def extract_audio(firebase_filename: str) -> str:
17
  logging.warning(f"No audio stream found in {firebase_filename}")
18
  return None
19
 
20
- output_container = av.open(io.BytesIO(), mode='w', format='wav')
 
 
 
 
 
 
21
  output_stream = output_container.add_stream('pcm_s16le', rate=audio_stream.rate)
22
 
 
23
  for frame in input_container.decode(audio_stream):
 
24
  for packet in output_stream.encode(frame):
25
  output_container.mux(packet)
26
 
 
 
27
  # Flush the stream
28
  for packet in output_stream.encode(None):
29
  output_container.mux(packet)
30
 
31
  output_container.close()
32
 
33
- audio_content = output_container.data.getvalue()
 
 
 
 
 
 
 
34
  audio_filename = f"{firebase_filename}_audio.wav"
35
  await upload_file_to_firebase(audio_content, audio_filename)
36
 
 
37
  return audio_filename
38
  except Exception as e:
39
  logging.error(f"Error extracting audio: {str(e)}")
 
40
  return None
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  async def extract_frames(firebase_filename: str, max_frames: int = 10) -> List[str]:
43
  frames = []
44
  video_content = get_file_content(firebase_filename)
@@ -69,17 +115,6 @@ async def extract_frames(firebase_filename: str, max_frames: int = 10) -> List[s
69
 
70
  return frames
71
 
72
- import av
73
- import numpy as np
74
- from PIL import Image
75
- import io
76
- from app.utils.file_utils import get_file_content, upload_file_to_firebase, remove_temp_file
77
- import logging
78
- import uuid
79
- from typing import List, Tuple
80
-
81
- # ... (previous functions remain unchanged)
82
-
83
  async def compress_and_process_video(firebase_filename: str, target_size_mb: int = 50, max_duration: int = 60) -> str:
84
  video_content = get_file_content(firebase_filename)
85
 
@@ -120,21 +155,25 @@ async def compress_and_process_video(firebase_filename: str, target_size_mb: int
120
 
121
  if audio_stream:
122
  output_audio_stream = output_container.add_stream('aac', rate=audio_stream.rate)
123
- output_audio_stream.bit_rate = 128000 # 128k bitrate for audio
124
-
125
- for frame in input_container.decode(video=0):
126
- if frame.time > duration:
127
- break
128
- new_frame = frame.reformat(width=new_width, height=new_height, format='yuv420p')
129
- for packet in output_video_stream.encode(new_frame):
130
- output_container.mux(packet)
131
-
132
- if audio_stream:
133
- for frame in input_container.decode(audio=0):
134
- if frame.time > duration:
135
- break
136
- for packet in output_audio_stream.encode(frame):
137
- output_container.mux(packet)
 
 
 
 
138
 
139
  # Flush streams
140
  for packet in output_video_stream.encode(None):
@@ -151,8 +190,10 @@ async def compress_and_process_video(firebase_filename: str, target_size_mb: int
151
  output_filename = f"{firebase_filename}_compressed.mp4"
152
  await upload_file_to_firebase(compressed_content, output_filename)
153
 
 
154
  return output_filename
155
 
156
  except Exception as e:
157
  logging.error(f"Error compressing and processing video: {str(e)}")
 
158
  raise
 
2
  import numpy as np
3
  from PIL import Image
4
  import io
5
+ import traceback
6
  from app.utils.file_utils import get_file_content, upload_file_to_firebase, remove_temp_file
7
  import logging
8
  import uuid
9
  from typing import List, Tuple
10
+ import librosa
11
 
12
  async def extract_audio(firebase_filename: str) -> str:
13
  try:
 
19
  logging.warning(f"No audio stream found in {firebase_filename}")
20
  return None
21
 
22
+ logging.info(f"Audio stream found: {audio_stream}")
23
+ logging.info(f"Audio codec: {audio_stream.codec_context.name}")
24
+ logging.info(f"Audio sample rate: {audio_stream.rate}")
25
+ logging.info(f"Audio bit rate: {audio_stream.bit_rate}")
26
+
27
+ output_buffer = io.BytesIO()
28
+ output_container = av.open(output_buffer, mode='w', format='wav')
29
  output_stream = output_container.add_stream('pcm_s16le', rate=audio_stream.rate)
30
 
31
+ frame_count = 0
32
  for frame in input_container.decode(audio_stream):
33
+ frame_count += 1
34
  for packet in output_stream.encode(frame):
35
  output_container.mux(packet)
36
 
37
+ logging.info(f"Processed {frame_count} audio frames")
38
+
39
  # Flush the stream
40
  for packet in output_stream.encode(None):
41
  output_container.mux(packet)
42
 
43
  output_container.close()
44
 
45
+ audio_content = output_buffer.getvalue()
46
+ audio_size = len(audio_content)
47
+ logging.info(f"Extracted audio size: {audio_size} bytes")
48
+
49
+ if audio_size < 1024: # Check if audio content is too small (less than 1KB)
50
+ logging.warning(f"Extracted audio is too short for {firebase_filename}")
51
+ return None
52
+
53
  audio_filename = f"{firebase_filename}_audio.wav"
54
  await upload_file_to_firebase(audio_content, audio_filename)
55
 
56
+ logging.info(f"Audio extracted and uploaded: {audio_filename}")
57
  return audio_filename
58
  except Exception as e:
59
  logging.error(f"Error extracting audio: {str(e)}")
60
+ logging.error(traceback.format_exc())
61
  return None
62
 
63
+ def detect_speech(audio_content: bytes) -> bool:
64
+ try:
65
+ y, sr = librosa.load(io.BytesIO(audio_content), sr=None)
66
+ logging.info(f"Loaded audio with sample rate: {sr}, length: {len(y)}")
67
+
68
+ # Calculate the root mean square energy
69
+ rms = librosa.feature.rms(y=y)[0]
70
+
71
+ # Calculate the percentage of frames with energy above a threshold
72
+ threshold = 0.01 # Adjust this value based on your needs
73
+ speech_frames = np.sum(rms > threshold)
74
+ speech_percentage = speech_frames / len(rms)
75
+
76
+ logging.info(f"Speech detection: {speech_percentage:.2%} of frames above threshold")
77
+
78
+ # If more than 10% of frames have energy above the threshold, consider it speech
79
+ is_speech = speech_percentage > 0.1
80
+ logging.info(f"Speech detected: {is_speech}")
81
+
82
+ return is_speech
83
+ except Exception as e:
84
+ logging.error(f"Error detecting speech: {str(e)}")
85
+ logging.error(traceback.format_exc())
86
+ return False
87
+
88
  async def extract_frames(firebase_filename: str, max_frames: int = 10) -> List[str]:
89
  frames = []
90
  video_content = get_file_content(firebase_filename)
 
115
 
116
  return frames
117
 
 
 
 
 
 
 
 
 
 
 
 
118
  async def compress_and_process_video(firebase_filename: str, target_size_mb: int = 50, max_duration: int = 60) -> str:
119
  video_content = get_file_content(firebase_filename)
120
 
 
155
 
156
  if audio_stream:
157
  output_audio_stream = output_container.add_stream('aac', rate=audio_stream.rate)
158
+ output_audio_stream.bit_rate = min(128000, audio_stream.bit_rate or 128000) # 128k bitrate for audio, or lower if original is lower
159
+
160
+ for packet in input_container.demux((video_stream, audio_stream) if audio_stream else (video_stream,)):
161
+ if packet.dts is None:
162
+ continue
163
+
164
+ if packet.stream.type == 'video':
165
+ for frame in packet.decode():
166
+ if frame.time > duration:
167
+ break
168
+ new_frame = frame.reformat(width=new_width, height=new_height, format='yuv420p')
169
+ for packet in output_video_stream.encode(new_frame):
170
+ output_container.mux(packet)
171
+ elif packet.stream.type == 'audio' and audio_stream:
172
+ for frame in packet.decode():
173
+ if frame.time > duration:
174
+ break
175
+ for packet in output_audio_stream.encode(frame):
176
+ output_container.mux(packet)
177
 
178
  # Flush streams
179
  for packet in output_video_stream.encode(None):
 
190
  output_filename = f"{firebase_filename}_compressed.mp4"
191
  await upload_file_to_firebase(compressed_content, output_filename)
192
 
193
+ logging.info(f"Compressed video uploaded to Firebase: {output_filename}")
194
  return output_filename
195
 
196
  except Exception as e:
197
  logging.error(f"Error compressing and processing video: {str(e)}")
198
+ logging.error(traceback.format_exc())
199
  raise