mango1 / app.py
on1onmangoes's picture
Update app.py
b38a0bf
raw
history blame
1.46 kB
import streamlit as st
import time as t
from transformers import pipeline
from pydub import AudioSegment, silence
#import speech_recognition as sr
#pipe = pipeline('sentiment-analysis')
#text = st.text_area('Enter your notes')
#if text:
# out = pipe(text)
# st.json(out)
st.markdown("<h1 style = text align:center;'> Group Therapy Notes </h1>",unsafe_allow_html = True)
st.markdown("---",unsafe_allow_html=True)
audio=st.file_uploader("Upload Your Audio File", type=['mp3','wav','m4a'])
if audio:
pipe = pipeline('automatic-speech-recognition',model="facebook/wav2vec2-base-960h")
audio_segment= AudioSegment.from_file(audio)
audio_segment.export("audio.wav", format="wav")
output = pipe("audio.wav", chunk_length_s=10, stride_length_s=(4, 2))
st.json(output)
# stride_length_s is a tuple of the left and right stride length.
# With only 1 number, both sides get the same stride, by default
# the stride_length on one side is 1/6th of the chunk_length_s
# chunk.export(str(index)+".wav", format="wav")
# audio_segment= AudioSegment.from_file(audio)
# chunks=silence.split_on_silence(audio_segment, min_silence_len=500, silence_thresh= audio_segment.dBFS-20,keep_silence=100)
# for index, chunk in enumerate (chunks):
# #output = pipe(audio_segment, chunk_length_s=10, stride_length_s=(4, 2))
# print (chunk)
# st.json("wav")