vumichien commited on
Commit
dfa0f16
β€’
1 Parent(s): ca8a6b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -15
app.py CHANGED
@@ -10,6 +10,7 @@ audio_model = WhisperModel("base", compute_type="int8", device="cpu")
10
  text_model = SentenceTransformer('all-MiniLM-L6-v2')
11
  corpus_embeddings = torch.load('corpus_embeddings.pt')
12
  model_type = "whisper"
 
13
 
14
  def speech_to_text(upload_audio):
15
  """
@@ -29,7 +30,7 @@ def voice_detect(audio, recongnize_text=""):
29
  """
30
  Transcribe audio using whisper model.
31
  """
32
- # time.sleep(1)
33
  if len(recongnize_text) !=0:
34
  count_state = int(recongnize_text[0])
35
  recongnize_text = recongnize_text[1:]
@@ -39,7 +40,7 @@ def voice_detect(audio, recongnize_text=""):
39
  threshold = 0.8
40
  detect_greeting = 0
41
  text = speech_to_text(audio)
42
- if text == "γ”θ¦–θ΄γ‚γ‚ŠγŒγ¨γ†γ”γ–γ„γΎγ—γŸ" or text == "γ”θ¦–θ΄γ‚γ‚ŠγŒγ¨γ†γ”γ–γ„γΎγ—γŸγ€‚":
43
  text = ""
44
  recongnize_text = recongnize_text + " " + text
45
  query_embedding = text_model.encode(text, convert_to_tensor=True)
@@ -55,18 +56,25 @@ def voice_detect(audio, recongnize_text=""):
55
  recongnize_state = str(count_state + detect_greeting) + recongnize_text
56
  return recongnize_text, recongnize_state, count_state
57
 
58
- demo = gr.Interface(
59
- title= "Greeting detection demo app πŸ™‡",
60
- fn=voice_detect,
61
- inputs=[
62
- gr.Audio(source="microphone", type="filepath", streaming=True),
63
- "state",
64
- ],
65
- outputs=[
66
- gr.Textbox(label="Predicted"),
67
- "state",
68
- gr.Number(label="Greeting count"),
69
- ],
70
- live=True)
 
 
 
 
 
 
 
71
 
72
  demo.launch(debug=True)
 
10
  text_model = SentenceTransformer('all-MiniLM-L6-v2')
11
  corpus_embeddings = torch.load('corpus_embeddings.pt')
12
  model_type = "whisper"
13
+ title= "Greeting detection demo app"
14
 
15
  def speech_to_text(upload_audio):
16
  """
 
30
  """
31
  Transcribe audio using whisper model.
32
  """
33
+ # time.sleep(2)
34
  if len(recongnize_text) !=0:
35
  count_state = int(recongnize_text[0])
36
  recongnize_text = recongnize_text[1:]
 
40
  threshold = 0.8
41
  detect_greeting = 0
42
  text = speech_to_text(audio)
43
+ if "γ”θ¦–θ΄γ‚γ‚ŠγŒγ¨γ†γ”γ–γ„γΎγ—γŸ" in text:
44
  text = ""
45
  recongnize_text = recongnize_text + " " + text
46
  query_embedding = text_model.encode(text, convert_to_tensor=True)
 
56
  recongnize_state = str(count_state + detect_greeting) + recongnize_text
57
  return recongnize_text, recongnize_state, count_state
58
 
59
+ def clear():
60
+ return None, None, None
61
+
62
+ demo = gr.Blocks(title=title)
63
+
64
+ with demo:
65
+ gr.Markdown('''
66
+ <div>
67
+ <h1 style='text-align: center'>ζŒ¨ζ‹Άγ‚«γ‚¦γƒ³γ‚ΏγƒΌ</h1>
68
+ </div>
69
+ ''')
70
+ with gr.Row():
71
+ with gr.Column():
72
+ audio_source = gr.Audio(source="microphone", type="filepath", streaming=True)
73
+ state = gr.State(value="")
74
+ with gr.Column():
75
+ greeting_count = gr.Number(label="ζŒ¨ζ‹Άε›žζ•°")
76
+ with gr.Row():
77
+ text_output = gr.Textbox(label="θͺθ­˜γ•γ‚ŒγŸγƒ†γ‚­γ‚Ήγƒˆ")
78
+ audio_source.stream(voice_detect, inputs=[audio_source, state], outputs=[text_output, state, greeting_count])
79
 
80
  demo.launch(debug=True)