import streamlit as st import requests import Levenshtein from io import BytesIO from audio_recorder_streamlit import audio_recorder # Function to securely load the Hugging Face API token @st.cache_resource def load_hf_token(): return st.secrets["HF_API_KEY"] # Function to query the Hugging Face Inference API def transcribe_audio_hf(audio_bytes): """ Transcribes speech from an audio file using the Hugging Face Inference API. Args: audio_bytes (bytes): Audio data in bytes. Returns: str: The transcription of the speech in the audio file. """ API_URL = "https://api-inference.huggingface.co/models/jonatasgrosman/wav2vec2-large-xlsr-53-arabic" headers = {"Authorization": f"Bearer {load_hf_token()}"} response = requests.post(API_URL, headers=headers, data=audio_bytes) return response.json().get("text", "").strip() def levenshtein_similarity(transcription1, transcription2): """ Calculate the Levenshtein similarity between two transcriptions. Args: transcription1 (str): The first transcription. transcription2 (str): The second transcription. Returns: float: A normalized similarity score between 0 and 1, where 1 indicates identical transcriptions. """ distance = Levenshtein.distance(transcription1, transcription2) max_len = max(len(transcription1), len(transcription2)) return 1 - distance / max_len # Normalize to get similarity score def evaluate_audio_similarity(original_audio_bytes, user_audio_bytes): """ Compares the similarity between the transcription of an original audio file and a user's audio file. Args: original_audio_bytes (bytes): Bytes of the original audio file. user_audio_bytes (bytes): Bytes of the user's audio file. Returns: tuple: Transcriptions and Levenshtein similarity score. """ transcription_original = transcribe_audio_hf(original_audio_bytes) transcription_user = transcribe_audio_hf(user_audio_bytes) similarity_score_levenshtein = levenshtein_similarity(transcription_original, transcription_user) return transcription_original, transcription_user, similarity_score_levenshtein st.title("Audio Transcription and Similarity Checker") # Choose between upload or record st.sidebar.header("Input Method") input_method = st.sidebar.selectbox("Choose Input Method", ["Upload", "Record"]) original_audio_bytes = None user_audio_bytes = None if input_method == "Upload": # Upload original audio file original_audio = st.file_uploader("Upload Original Audio", type=["wav", "mp3"]) if original_audio: original_audio_bytes = original_audio.read() st.audio(original_audio_bytes, format="audio/wav") # Upload user audio file user_audio = st.file_uploader("Upload User Audio", type=["wav", "mp3"]) if user_audio: user_audio_bytes = user_audio.read() st.audio(user_audio_bytes, format="audio/wav") # Add a button to perform the test if original_audio_bytes and user_audio_bytes: if st.button("Perform Testing"): with st.spinner("Performing transcription and similarity testing..."): transcription_original, transcription_user, similarity_score = evaluate_audio_similarity(original_audio_bytes, user_audio_bytes) # Display results st.markdown("---") st.subheader("Transcriptions and Similarity Score") st.write(f"**Original Transcription:** {transcription_original}") st.write(f"**User Transcription:** {transcription_user}") st.write(f"**Levenshtein Similarity Score:** {similarity_score:.2f}") if similarity_score > 0.8: # Adjust the threshold as needed st.success("The pronunciation is likely correct based on transcription similarity.") else: st.error("The pronunciation may be incorrect based on transcription similarity.") elif input_method == "Record": st.write("Record or Upload Original Audio") st.write("Click the button below to start recording. The button will turn green when recording is active.") original_audio_bytes = audio_recorder(key="original_audio_recorder") if original_audio_bytes and len(original_audio_bytes) > 0: with st.spinner("Processing original audio..."): st.audio(original_audio_bytes, format="audio/wav") st.success("Original audio recorded successfully!") else: st.warning("No original audio recorded. Please record or upload an audio file.") st.write("Record or Upload User Audio") st.write("Click the button below to start recording. The button will turn green when recording is active.") user_audio_bytes = audio_recorder(key="user_audio_recorder") if user_audio_bytes and len(user_audio_bytes) > 0: with st.spinner("Processing user audio..."): st.audio(user_audio_bytes, format="audio/wav") st.success("User audio recorded successfully!") else: st.warning("No user audio recorded. Please record or upload an audio file.") # Add a button to perform the test if original_audio_bytes and user_audio_bytes: if st.button("Perform Testing"): with st.spinner("Performing transcription and similarity testing..."): transcription_original, transcription_user, similarity_score = evaluate_audio_similarity(original_audio_bytes, user_audio_bytes) # Display results st.markdown("---") st.subheader("Transcriptions and Similarity Score") st.write(f"**Original Transcription:** {transcription_original}") st.write(f"**User Transcription:** {transcription_user}") st.write(f"**Levenshtein Similarity Score:** {similarity_score:.2f}") if similarity_score > 0.8: # Adjust the threshold as needed st.success("The pronunciation is likely correct based on transcription similarity.") else: st.error("The pronunciation may be incorrect based on transcription similarity.")