Spaces:
Runtime error
Runtime error
File size: 3,467 Bytes
1726dba 065ecac 1726dba d130ccc 2906c35 d130ccc 2906c35 d130ccc 2906c35 5cc4f06 bcfb814 5cc4f06 5767b04 d7e888d bcfb814 2906c35 c5cb357 8222fa8 c5cb357 5cc4f06 c5cb357 5cc4f06 2906c35 c5cb357 5cc4f06 2906c35 c5cb357 5cc4f06 8222fa8 2906c35 5cc4f06 9f27454 8b14f83 147a6a8 2906c35 eca6624 2906c35 8b14f83 2906c35 147a6a8 c5cb357 8b14f83 5cc4f06 8b14f83 5cc4f06 2906c35 c5cb357 8b14f83 2906c35 8b14f83 2906c35 5cc4f06 43c8625 5cc4f06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import streamlit as st
from st_audiorec import st_audiorec
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
#from datasets import load_dataset
import torch
from gliner import GLiNER
from resources import Lead_Labels, entity_labels, set_start, audit_elapsedtime
def main ():
print("------------------------------")
print(f"Running main")
rec = init_model_trans()
ner = init_model_ner() #async
labels = entity_labels
# text = "I have a proposal from cgd where they want one outsystems junior developers and one senior for an estimate of three hundred euros a day, for six months."
# print(f"get entities from sample text: {text}")
# get_entity_labels(model=ner, text=text, labels=labels)
print("Rendering UI...")
start_render = set_start()
wav_audio_data = st_audiorec()
audit_elapsedtime(function="Rendering UI", start=start_render)
if wav_audio_data is not None and rec is not None:
print("Loading data...")
start_loading = set_start()
st.audio(wav_audio_data, format='audio/wav')
text = transcribe(wav_audio_data, rec)
if text is not None:
get_entity_labels(labels=labels, model=ner, text=text)
audit_elapsedtime(function="Loading data", start=start_loading)
def init_model_trans ():
print("Initiating transcription model...")
start = set_start()
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "openai/whisper-large-v3"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=30,
batch_size=16,
return_timestamps=True,
torch_dtype=torch_dtype,
device=device,
)
print(f'Init model successful')
audit_elapsedtime(function="Initiating transcription model", start=start)
return pipe
def init_model_ner():
print("Initiating NER model...")
start = set_start()
model = GLiNER.from_pretrained("urchade/gliner_multi")
audit_elapsedtime(function="Initiating NER model", start=start)
return model
def transcribe (audio_sample: bytes, pipe) -> str:
print("Initiating transcription...")
start = set_start()
# dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
# sample = dataset[0]["audio"]
result = pipe(audio_sample)
audit_elapsedtime(function="Transcription", start=start)
print(result)
st.write('trancription: ', result["text"])
return result["text"]
def get_entity_labels(model: GLiNER, text: str, labels: list): #-> Lead_labels:
print("Initiating entity recognition...")
start = set_start()
entities = model.predict_entities(text, labels)
audit_elapsedtime(function="Retreiving entity labels from text", start=start)
for entity in entities:
print(entity["text"], "=>", entity["label"])
st.write('Entities: ', entities)
# return Lead_Labels()
if __name__ == "__main__":
print("IN __name__")
main() |