Spaces:

mgbam
/

smart-edit-assistant

Sleeping

App Files Files Community

mgbam commited on Apr 16

Commit

cc3065b

verified ·

1 Parent(s): 896af9f

Update pipelines/ai_inference.py

Browse files

Files changed (1) hide show

pipelines/ai_inference.py +27 -3

pipelines/ai_inference.py CHANGED Viewed

@@ -3,21 +3,45 @@ import whisper
 import json
 def transcribe_audio(audio_file):
-    # Option A: local Whisper
     model = whisper.load_model("base")
     result = model.transcribe(audio_file)
     return result["text"]
 def generate_edit_instructions(transcript_text):
-    system_msg = """You are a video editing assistant..."""
     user_msg = f"Transcript:\n{transcript_text}\n\nOutput instructions in JSON..."
-    # GPT-based approach
     response = openai.ChatCompletion.create(
         model="gpt-4",
         messages=[
             {"role": "system", "content": system_msg},
             {"role": "user", "content": user_msg}
         ],
     )
     return response.choices[0].message["content"]

 import json
 def transcribe_audio(audio_file):
+    """
+    Transcribes the given audio file using the local Whisper model.
+    :param audio_file: Path to the audio file (e.g., WAV) to transcribe.
+    :return: The transcribed text as a string.
+    """
+    # Load the 'base' Whisper model (you can also use 'small', 'medium', 'large', etc.)
     model = whisper.load_model("base")
+    # Perform transcription
     result = model.transcribe(audio_file)
+    # Return only the text portion
     return result["text"]
 def generate_edit_instructions(transcript_text):
+    """
+    Sends the transcript to GPT-4 for video editing instructions in JSON format.
+    :param transcript_text: The raw transcript text from Whisper.
+    :return: A string containing GPT-4's response, typically expected to be valid JSON or structured text.
+    """
+    system_msg = (
+        "You are a video editing assistant. Your task is to parse the following transcript and "
+        "provide a list of editing instructions in JSON format. "
+        "Instructions may include timecodes for removing filler words, suggestions for B-roll, "
+        "and recommended transitions."
+    )
     user_msg = f"Transcript:\n{transcript_text}\n\nOutput instructions in JSON..."
+    # Call the OpenAI API for GPT-based analysis
     response = openai.ChatCompletion.create(
         model="gpt-4",
         messages=[
             {"role": "system", "content": system_msg},
             {"role": "user", "content": user_msg}
         ],
+        temperature=0.7,
+        max_tokens=500
     )
+    # Extract the AI's output from the first choice
     return response.choices[0].message["content"]