avfranco commited on
Commit
7ebcec2
1 Parent(s): 787c0bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -23
app.py CHANGED
@@ -2,15 +2,12 @@ import os
2
  import spaces
3
  import torch
4
  import gradio as gr
5
- from openai import OpenAI
6
  from transformers import pipeline
7
 
8
  MODEL_NAME = "openai/whisper-large-v3"
9
  BATCH_SIZE = 8
10
  FILE_LIMIT_MB = 1000
11
 
12
- client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
13
-
14
  device = 0 if torch.cuda.is_available() else "cpu"
15
 
16
  pipe = pipeline(
@@ -20,36 +17,18 @@ pipe = pipeline(
20
  device=device,
21
  )
22
 
23
- @spaces.GPU
24
- def respond_to_question(transcript, question):
25
- # Optionally, use OpenAI API to generate a response to the user's question
26
- # based on the transcript
27
- response = ""
28
- # Replace this with your OpenAI API key
29
- response = client.completions.create(
30
- engine="gpt-4o-mini",
31
- prompt=f"Transcript: {transcript}\n\nUser: {question}\n\nAI:",
32
- temperature=0.3,
33
- max_tokens=60,
34
- top_p=1,
35
- frequency_penalty=0,
36
- presence_penalty=0
37
- ).choices[0].text
38
-
39
- return response
40
-
41
  @spaces.GPU
42
  def respond_to_question_llama(transcript, question):
43
  from huggingface_hub import InferenceClient
44
 
45
  client = InferenceClient(
46
- "meta-llama/Meta-Llama-3.1-8B-Instruct",
47
  token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
48
  )
49
 
50
  response = client.chat_completion(
51
  messages=[{"role": "user", "content": f"Transcript: {transcript}\n\nUser: {question}"}],
52
- max_tokens=500,
53
  ).choices[0].message.content
54
 
55
  return response
 
2
  import spaces
3
  import torch
4
  import gradio as gr
 
5
  from transformers import pipeline
6
 
7
  MODEL_NAME = "openai/whisper-large-v3"
8
  BATCH_SIZE = 8
9
  FILE_LIMIT_MB = 1000
10
 
 
 
11
  device = 0 if torch.cuda.is_available() else "cpu"
12
 
13
  pipe = pipeline(
 
17
  device=device,
18
  )
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @spaces.GPU
21
  def respond_to_question_llama(transcript, question):
22
  from huggingface_hub import InferenceClient
23
 
24
  client = InferenceClient(
25
+ "meta-llama/Meta-Llama-3.1-70B-Instruct",
26
  token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
27
  )
28
 
29
  response = client.chat_completion(
30
  messages=[{"role": "user", "content": f"Transcript: {transcript}\n\nUser: {question}"}],
31
+ max_tokens=150,
32
  ).choices[0].message.content
33
 
34
  return response