Spaces:

avfranco
/

audioqna

Running on Zero

avfranco commited on Sep 16

Commit

7ebcec2

•

1 Parent(s): 787c0bf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,15 +2,12 @@ import os
 import spaces
 import torch
 import gradio as gr
-from openai import OpenAI
 from transformers import pipeline
 MODEL_NAME = "openai/whisper-large-v3"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
-client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
 device = 0 if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
@@ -20,36 +17,18 @@ pipe = pipeline(
     device=device,
 )
-@spaces.GPU
-def respond_to_question(transcript, question):
-    # Optionally, use OpenAI API to generate a response to the user's question
-    # based on the transcript
-    response = ""
-    # Replace this with your OpenAI API key
-    response = client.completions.create(
-        engine="gpt-4o-mini",
-        prompt=f"Transcript: {transcript}\n\nUser: {question}\n\nAI:",
-        temperature=0.3,
-        max_tokens=60,
-        top_p=1,
-        frequency_penalty=0,
-        presence_penalty=0
-    ).choices[0].text
-    return response
 @spaces.GPU
 def respond_to_question_llama(transcript, question):
     from huggingface_hub import InferenceClient
     client = InferenceClient(
-        "meta-llama/Meta-Llama-3.1-8B-Instruct",
         token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
     )
     response = client.chat_completion(
         messages=[{"role": "user", "content": f"Transcript: {transcript}\n\nUser: {question}"}],
-        max_tokens=500,
     ).choices[0].message.content
     return response

 import spaces
 import torch
 import gradio as gr
 from transformers import pipeline
 MODEL_NAME = "openai/whisper-large-v3"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 device = 0 if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
     device=device,
 )
 @spaces.GPU
 def respond_to_question_llama(transcript, question):
     from huggingface_hub import InferenceClient
     client = InferenceClient(
+        "meta-llama/Meta-Llama-3.1-70B-Instruct",
         token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
     )
     response = client.chat_completion(
         messages=[{"role": "user", "content": f"Transcript: {transcript}\n\nUser: {question}"}],
+        max_tokens=150,
     ).choices[0].message.content
     return response