Spaces:

gaur3009
/

Speech_grammar

Sleeping

App Files Files Community

Speech_grammar / app.py

gaur3009

Update app.py

790bc3d verified 12 days ago

raw

history blame contribute delete

2.68 kB

	import torch
	import gradio as gr
	import speech_recognition as sr
	import pyttsx3
	import time
	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
	from happytransformer import HappyTextToText, TTSettings # Using HappyTransformer

	# Load models only once for efficiency
	def load_models():
	model_name = "prithivida/grammar_error_correcter_v1"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
	happy_tt = HappyTextToText("T5", "prithivida/grammar_error_correcter_v1") # Using T5-based model
	return tokenizer, model, happy_tt

	tokenizer, model, happy_tt = load_models() # Load models at startup

	# Speech-to-text conversion
	def transcribe_audio(audio):
	recognizer = sr.Recognizer()
	with sr.AudioFile(audio) as source:
	audio_data = recognizer.record(source)
	try:
	text = recognizer.recognize_google(audio_data)
	return text
	except sr.UnknownValueError:
	return "Could not understand the audio."
	except sr.RequestError as e:
	return f"Speech recognition error: {e}"

	# Grammar correction function
	def correct_grammar(text):
	if not text.strip():
	return "No input provided.", 0, "No correction available."

	inputs = tokenizer.encode("gec: " + text, return_tensors="pt", max_length=128, truncation=True)
	with torch.no_grad():
	outputs = model.generate(inputs, max_length=128, num_return_sequences=1)
	corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

	args = TTSettings(num_beams=5, min_length=1)
	correction = happy_tt.generate_text("gec: " + text, args=args).text # Better correction method
	grammar_score = 100 - abs(len(text) - len(correction)) # Scoring based on text change ratio

	return corrected_text, grammar_score, correction

	# Unified function for both speech and text input
	def process_input(audio, text):
	if audio:
	text = transcribe_audio(audio) # If audio is provided, transcribe it
	return correct_grammar(text)

	# Gradio UI
	def main():
	iface = gr.Interface(
	fn=process_input,
	inputs=[
	gr.Audio(sources=["microphone"], type="filepath", label="Speak your sentence"),
	gr.Textbox(placeholder="Or type here if not speaking...", label="Text Input"),
	],
	outputs=["text", "number", "text"],
	title="AI Grammar Checker",
	description="Speak or type a sentence to check its grammar, get corrections, and see a score.",
	live=False, # Only processes when user submit
	api_name = "/predict",
	)

	iface.launch()

	if __name__ == "__main__":
	main()