Spaces:

Fabriwin
/

Convx

Build error

App Files Files Community

Convx / app.py

Fabriwin

Upload 3 files

3175dca verified 6 months ago

raw

history blame

2.49 kB

	import gradio as gr
	import torch
	from transformers import pipeline
	import time
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)

	# Define the models using pipeline
	asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-small", chunk_length_s=30)
	text_pipe = pipeline("text-generation", model="HuggingFaceTB/SmolLM2-360M", max_length=512, temperature=0.7, top_p=0.9)
	tts_pipe = pipeline("text-to-speech", model="mussacharles60/swahili-tts-female-voice")

	# Define conversation rules
	MAX_INPUT_SIZE = 100
	PREDEFINED_ATTRIBUTES = ["name", "age", "location"]
	CONTEXT_HISTORY = []

	# Define the function to recognize speech
	def recognize_speech(audio):
	retries = 3
	for _ in range(retries):
	try:
	result = asr_pipe(audio, return_timestamps=True)
	return result['text']
	except Exception as e:
	logging.error(f"ASR failed: {e}")
	time.sleep(1)
	return ""

	# Define the function to generate text
	def generate_text(prompt):
	global CONTEXT_HISTORY
	CONTEXT_HISTORY.append(prompt)
	if len(CONTEXT_HISTORY) > 5:
	CONTEXT_HISTORY.pop(0)
	context = " ".join(CONTEXT_HISTORY)
	outputs = text_pipe(context, max_length=512, num_return_sequences=1)
	generated_text = outputs[0]['generated_text']
	return generated_text

	# Define the function to synthesize speech
	def synthesize_speech(text):
	audio = tts_pipe(text, output_format="wav", sample_rate=16000)
	return audio

	# Define the function to handle conversation
	def handle_conversation(audio):
	recognized_text = recognize_speech(audio)
	if any(attr in recognized_text.lower() for attr in PREDEFINED_ATTRIBUTES):
	generated_text = generate_text(f"Please provide your {recognized_text}")
	else:
	generated_text = generate_text(recognized_text)
	synthesized_audio = synthesize_speech(generated_text)
	return synthesized_audio, generated_text

	# Define the Gradio app
	demo = gr.Blocks()

	# Define the input and output components
	input_audio = gr.Audio(label="Input Audio")
	output_audio = gr.Audio(label="Output Audio")
	output_text = gr.Textbox(label="Output Text")

	# Define the buttons
	conversation_button = gr.Button("Start Conversation")

	# Define the event listeners
	conversation_button.click(handle_conversation, inputs=input_audio, outputs=[output_audio, output_text])

	# Launch the app
	demo.launch()