mgbam commited on
Commit
cc3065b
·
verified ·
1 Parent(s): 896af9f

Update pipelines/ai_inference.py

Browse files
Files changed (1) hide show
  1. pipelines/ai_inference.py +27 -3
pipelines/ai_inference.py CHANGED
@@ -3,21 +3,45 @@ import whisper
3
  import json
4
 
5
  def transcribe_audio(audio_file):
6
- # Option A: local Whisper
 
 
 
 
 
 
7
  model = whisper.load_model("base")
 
8
  result = model.transcribe(audio_file)
 
9
  return result["text"]
10
 
11
  def generate_edit_instructions(transcript_text):
12
- system_msg = """You are a video editing assistant..."""
 
 
 
 
 
 
 
 
 
 
 
 
13
  user_msg = f"Transcript:\n{transcript_text}\n\nOutput instructions in JSON..."
14
 
15
- # GPT-based approach
16
  response = openai.ChatCompletion.create(
17
  model="gpt-4",
18
  messages=[
19
  {"role": "system", "content": system_msg},
20
  {"role": "user", "content": user_msg}
21
  ],
 
 
22
  )
 
 
23
  return response.choices[0].message["content"]
 
3
  import json
4
 
5
  def transcribe_audio(audio_file):
6
+ """
7
+ Transcribes the given audio file using the local Whisper model.
8
+
9
+ :param audio_file: Path to the audio file (e.g., WAV) to transcribe.
10
+ :return: The transcribed text as a string.
11
+ """
12
+ # Load the 'base' Whisper model (you can also use 'small', 'medium', 'large', etc.)
13
  model = whisper.load_model("base")
14
+ # Perform transcription
15
  result = model.transcribe(audio_file)
16
+ # Return only the text portion
17
  return result["text"]
18
 
19
  def generate_edit_instructions(transcript_text):
20
+ """
21
+ Sends the transcript to GPT-4 for video editing instructions in JSON format.
22
+
23
+ :param transcript_text: The raw transcript text from Whisper.
24
+ :return: A string containing GPT-4's response, typically expected to be valid JSON or structured text.
25
+ """
26
+ system_msg = (
27
+ "You are a video editing assistant. Your task is to parse the following transcript and "
28
+ "provide a list of editing instructions in JSON format. "
29
+ "Instructions may include timecodes for removing filler words, suggestions for B-roll, "
30
+ "and recommended transitions."
31
+ )
32
+
33
  user_msg = f"Transcript:\n{transcript_text}\n\nOutput instructions in JSON..."
34
 
35
+ # Call the OpenAI API for GPT-based analysis
36
  response = openai.ChatCompletion.create(
37
  model="gpt-4",
38
  messages=[
39
  {"role": "system", "content": system_msg},
40
  {"role": "user", "content": user_msg}
41
  ],
42
+ temperature=0.7,
43
+ max_tokens=500
44
  )
45
+
46
+ # Extract the AI's output from the first choice
47
  return response.choices[0].message["content"]