matteocirca commited on
Commit
8a0a2d0
·
1 Parent(s): e8aa27e

Update app

Browse files
Files changed (1) hide show
  1. app.py +43 -31
app.py CHANGED
@@ -1,45 +1,57 @@
1
- from transformers import pipeline
2
  import gradio as gr
3
- from transformers import WhisperForConditionalGeneration
 
4
 
5
- pipe = pipeline(model="matteocirca/whisper-small-it")
6
- # pipe = pipeline(model="openai/whisper-small")
7
- # model = WhisperForConditionalGeneration.from_pretrained("matteocirca/whisper-small-it")
8
-
9
- segments = []
10
 
11
  def audio2segments(audio,word):
12
- global segments
13
- if not segments:
14
- print(pipe(audio).keys())
15
- # segments = pipe(audio)["segments"]
16
- # segments = model.transcribe(audio)["segments"]
17
- elif not word:
18
- return "No word detected"
19
- else:
 
 
 
 
 
 
20
  ranges = []
21
- for s in segments:
22
- if word in s['text'].replace(',',' , ').split(" "):
23
- ranges.append((s['start'],s['end']))
24
- res = ""
25
- for i,r in enumerate(ranges):
26
- res += f"{i}) {r[0]}-{r[1]}\n "
27
- return res
28
-
29
-
30
- def find_segment():
31
- global segments
32
- res = ""
33
-
 
 
 
 
 
 
34
 
35
  iface = gr.Interface(
36
  fn=audio2segments,
37
- inputs=[gr.Audio(sources=["microphone"], type="filepath"),"text"],
38
- outputs="text",
39
  title="Whisper Small Italian",
40
  description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.",
41
  )
42
 
43
 
44
  iface.launch()
45
-
 
1
+ from transformers import pipeline, WhisperModel
2
  import gradio as gr
3
+ import pandas as pd
4
+ import string
5
 
6
+ pipe = pipeline(model="matteocirca/whisper-small-it",return_timestamps="word")
7
+ #model = WhisperModel.from_pretrained("matteocirca/whisper-small-it")
8
+ #pipe = pipeline(model="openai/whisper-small",return_timestamps="word")
9
+ current_audio = None
10
+ segments = {}
11
 
12
  def audio2segments(audio,word):
13
+ global segments,current_audio
14
+
15
+ if audio != current_audio or current_audio == None:
16
+ segments = pipe(audio)
17
+ current_audio = audio
18
+ if not word:
19
+ if current_audio != None:
20
+ return segments["text"],"<html><h1>No Word inserted!</h1></html>"
21
+ else:
22
+ return "","<html><h1>No Word inserted!</h1></html>"
23
+ df = pd.DataFrame(columns=["Occurrence n","Starting TimeStamp","Ending TimeStamp"])
24
+
25
+ if word:
26
+ ranges_list = []
27
  ranges = []
28
+ print(segments)
29
+
30
+ for w in segments['chunks']:
31
+ if word == w["text"].translate(str.maketrans('', '', string.punctuation)).replace(" ","").lower() :
32
+ ranges_list.append(w["timestamp"])
33
+ res = "<table><thead><tr><th>Occurrence n°</th><th>Start</th><th>End</th></tr></thead><tbody>"
34
+
35
+
36
+ for i,r in enumerate(ranges_list):
37
+ #ranges_list.append({"Occurrence n":i,"Starting TimeStamp":r[0],"Ending TimeStamp":r[1]})
38
+ res += f"<tr><td>{i}</td><td>{r[0]}</td><td>{r[1]}</td></tr>"
39
+
40
+ res+=" </tbody></table>"
41
+ print(res)
42
+ return segments["text"],res
43
+
44
+ def clear():
45
+ segments = {}
46
+
47
 
48
  iface = gr.Interface(
49
  fn=audio2segments,
50
+ inputs=[gr.Audio(sources=["upload","microphone"], type="filepath"),"text"],
51
+ outputs=["text","html"],
52
  title="Whisper Small Italian",
53
  description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.",
54
  )
55
 
56
 
57
  iface.launch()