Spaces:

Subbu1304
/

voice_project

Runtime error

App Files Files Community

Subbu1304 commited on Feb 1

Commit

aca48d5

verified ·

1 Parent(s): 9e60904

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -61

app.py CHANGED Viewed

@@ -1,61 +1,37 @@
-import speech_recognition as sr
-import pyttsx3
-import json
-import sounddevice as sd
-import numpy as np
-# Load user credentials (Replace with database if needed)
-USER_CREDENTIALS = {
-    "john_doe": "hello123",
-    "alice_smith": "welcome456"
-}
-# Initialize Text-to-Speech Engine
-engine = pyttsx3.init()
-engine.setProperty('rate', 150)
-def speak(text):
-    """Convert text to speech."""
-    engine.say(text)
-    engine.runAndWait()
-def recognize_speech():
-    """Capture and recognize speech input."""
-    recognizer = sr.Recognizer()
-    with sr.Microphone() as source:
-        speak("Please say your username and password.")
-        print("Listening...")
-        recognizer.adjust_for_ambient_noise(source)
-        try:
-            audio = recognizer.listen(source)
-            text = recognizer.recognize_google(audio)
-            print(f"Recognized: {text}")
-            return text.lower()
-        except sr.UnknownValueError:
-            speak("Sorry, I didn't catch that. Please try again.")
-            return None
-        except sr.RequestError:
-            speak("Speech service is unavailable. Please check your internet connection.")
-            return None
-def authenticate():
-    """Authenticate user based on voice input."""
-    speech_text = recognize_speech()
-    if not speech_text:
-        return False
-    # Extract username and password
-    for username, password in USER_CREDENTIALS.items():
-        if username in speech_text and password in speech_text:
-            speak(f"Welcome, {username}. You are now logged in.")
-            return True
-    speak("Authentication failed. Please try again.")
-    return False
-if __name__ == "__main__":
-    speak("Welcome to the voice login system.")
-    if authenticate():
-        print("Login Successful!")
-    else:
-        print("Login Failed!")

+import gradio as gr
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import torch
+import soundfile as sf
+# Load the processor and model from Hugging Face
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53")
+model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53")
+def transcribe_audio(audio):
+    """
+    Takes an audio file, processes it using Hugging Face Wav2Vec2 model,
+    and returns the transcribed text.
+    """
+    # Read the audio file
+    audio_input, _ = sf.read(audio.name)
+    # Process audio input
+    input_values = processor(audio_input, return_tensors="pt").input_values
+    # Get model logits (raw prediction)
+    logits = model(input_values).logits
+    # Decode the prediction into text
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)
+    return transcription[0]
+# Create a Gradio interface for users to upload audio files
+iface = gr.Interface(fn=transcribe_audio,
+                     inputs=gr.Audio(source="upload", type="file"),
+                     outputs="text",
+                     title="Voice Login System",
+                     description="Upload an audio file for transcription using Wav2Vec2 model.")
+iface.launch()