Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -2,15 +2,12 @@ import os
|
|
2 |
import spaces
|
3 |
import torch
|
4 |
import gradio as gr
|
5 |
-
from openai import OpenAI
|
6 |
from transformers import pipeline
|
7 |
|
8 |
MODEL_NAME = "openai/whisper-large-v3"
|
9 |
BATCH_SIZE = 8
|
10 |
FILE_LIMIT_MB = 1000
|
11 |
|
12 |
-
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
|
13 |
-
|
14 |
device = 0 if torch.cuda.is_available() else "cpu"
|
15 |
|
16 |
pipe = pipeline(
|
@@ -20,36 +17,18 @@ pipe = pipeline(
|
|
20 |
device=device,
|
21 |
)
|
22 |
|
23 |
-
@spaces.GPU
|
24 |
-
def respond_to_question(transcript, question):
|
25 |
-
# Optionally, use OpenAI API to generate a response to the user's question
|
26 |
-
# based on the transcript
|
27 |
-
response = ""
|
28 |
-
# Replace this with your OpenAI API key
|
29 |
-
response = client.completions.create(
|
30 |
-
engine="gpt-4o-mini",
|
31 |
-
prompt=f"Transcript: {transcript}\n\nUser: {question}\n\nAI:",
|
32 |
-
temperature=0.3,
|
33 |
-
max_tokens=60,
|
34 |
-
top_p=1,
|
35 |
-
frequency_penalty=0,
|
36 |
-
presence_penalty=0
|
37 |
-
).choices[0].text
|
38 |
-
|
39 |
-
return response
|
40 |
-
|
41 |
@spaces.GPU
|
42 |
def respond_to_question_llama(transcript, question):
|
43 |
from huggingface_hub import InferenceClient
|
44 |
|
45 |
client = InferenceClient(
|
46 |
-
"meta-llama/Meta-Llama-3.1-
|
47 |
token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
|
48 |
)
|
49 |
|
50 |
response = client.chat_completion(
|
51 |
messages=[{"role": "user", "content": f"Transcript: {transcript}\n\nUser: {question}"}],
|
52 |
-
max_tokens=
|
53 |
).choices[0].message.content
|
54 |
|
55 |
return response
|
|
|
2 |
import spaces
|
3 |
import torch
|
4 |
import gradio as gr
|
|
|
5 |
from transformers import pipeline
|
6 |
|
7 |
MODEL_NAME = "openai/whisper-large-v3"
|
8 |
BATCH_SIZE = 8
|
9 |
FILE_LIMIT_MB = 1000
|
10 |
|
|
|
|
|
11 |
device = 0 if torch.cuda.is_available() else "cpu"
|
12 |
|
13 |
pipe = pipeline(
|
|
|
17 |
device=device,
|
18 |
)
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
@spaces.GPU
|
21 |
def respond_to_question_llama(transcript, question):
|
22 |
from huggingface_hub import InferenceClient
|
23 |
|
24 |
client = InferenceClient(
|
25 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct",
|
26 |
token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
|
27 |
)
|
28 |
|
29 |
response = client.chat_completion(
|
30 |
messages=[{"role": "user", "content": f"Transcript: {transcript}\n\nUser: {question}"}],
|
31 |
+
max_tokens=150,
|
32 |
).choices[0].message.content
|
33 |
|
34 |
return response
|