Subbu1304 commited on
Commit
aca48d5
·
verified ·
1 Parent(s): 9e60904

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -61
app.py CHANGED
@@ -1,61 +1,37 @@
1
- import speech_recognition as sr
2
- import pyttsx3
3
- import json
4
- import sounddevice as sd
5
- import numpy as np
6
-
7
- # Load user credentials (Replace with database if needed)
8
- USER_CREDENTIALS = {
9
- "john_doe": "hello123",
10
- "alice_smith": "welcome456"
11
- }
12
-
13
- # Initialize Text-to-Speech Engine
14
- engine = pyttsx3.init()
15
- engine.setProperty('rate', 150)
16
-
17
- def speak(text):
18
- """Convert text to speech."""
19
- engine.say(text)
20
- engine.runAndWait()
21
-
22
- def recognize_speech():
23
- """Capture and recognize speech input."""
24
- recognizer = sr.Recognizer()
25
- with sr.Microphone() as source:
26
- speak("Please say your username and password.")
27
- print("Listening...")
28
- recognizer.adjust_for_ambient_noise(source)
29
- try:
30
- audio = recognizer.listen(source)
31
- text = recognizer.recognize_google(audio)
32
- print(f"Recognized: {text}")
33
- return text.lower()
34
- except sr.UnknownValueError:
35
- speak("Sorry, I didn't catch that. Please try again.")
36
- return None
37
- except sr.RequestError:
38
- speak("Speech service is unavailable. Please check your internet connection.")
39
- return None
40
-
41
- def authenticate():
42
- """Authenticate user based on voice input."""
43
- speech_text = recognize_speech()
44
- if not speech_text:
45
- return False
46
-
47
- # Extract username and password
48
- for username, password in USER_CREDENTIALS.items():
49
- if username in speech_text and password in speech_text:
50
- speak(f"Welcome, {username}. You are now logged in.")
51
- return True
52
-
53
- speak("Authentication failed. Please try again.")
54
- return False
55
-
56
- if __name__ == "__main__":
57
- speak("Welcome to the voice login system.")
58
- if authenticate():
59
- print("Login Successful!")
60
- else:
61
- print("Login Failed!")
 
1
+ import gradio as gr
2
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
3
+ import torch
4
+ import soundfile as sf
5
+
6
+ # Load the processor and model from Hugging Face
7
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53")
8
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53")
9
+
10
+ def transcribe_audio(audio):
11
+ """
12
+ Takes an audio file, processes it using Hugging Face Wav2Vec2 model,
13
+ and returns the transcribed text.
14
+ """
15
+ # Read the audio file
16
+ audio_input, _ = sf.read(audio.name)
17
+
18
+ # Process audio input
19
+ input_values = processor(audio_input, return_tensors="pt").input_values
20
+
21
+ # Get model logits (raw prediction)
22
+ logits = model(input_values).logits
23
+
24
+ # Decode the prediction into text
25
+ predicted_ids = torch.argmax(logits, dim=-1)
26
+ transcription = processor.batch_decode(predicted_ids)
27
+
28
+ return transcription[0]
29
+
30
+ # Create a Gradio interface for users to upload audio files
31
+ iface = gr.Interface(fn=transcribe_audio,
32
+ inputs=gr.Audio(source="upload", type="file"),
33
+ outputs="text",
34
+ title="Voice Login System",
35
+ description="Upload an audio file for transcription using Wav2Vec2 model.")
36
+
37
+ iface.launch()