Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
from lang_trans.arabic import buckwalter | |
from difflib import SequenceMatcher | |
import pandas as pd | |
import gradio as gr | |
import time | |
# Gradio application for learning Noorani Qaida | |
p = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-arabic") | |
# Read excel file and store it in a dictionary | |
def read_excel_data(file_path, sheet_name): | |
data_list = [] | |
try: | |
# Read the Excel file | |
df = pd.read_excel(file_path, sheet_name=sheet_name) | |
# Iterate over the rows | |
for index, row in df.iterrows(): | |
test = row['test'] | |
correct = row['correct'] | |
close = row['close'] | |
wrong = row['wrong'] | |
image = row["image"] | |
# Store close and wrong values as a tuple | |
data_list.append((test, correct, close, wrong, image)) | |
return data_list | |
except Exception as e: | |
print(f"Error reading Excel file: {e}") | |
# List of sample texts to read | |
excel_file_path = "ASR_live_test.xlsx" | |
sheet_name= 'sample_test(threshold=75%)' | |
full_test_list = read_excel_data(excel_file_path, sheet_name) | |
# Similarity function | |
def similar(a, b): | |
return SequenceMatcher(None, a, b).ratio() | |
def transcribe(audio, reference_text): | |
time.sleep(1) | |
text = p(audio)["text"] | |
text = buckwalter.untrans(text) | |
state = text | |
if state is not None: | |
if similar(reference_text, state) > 0.75: | |
score = "correct" | |
elif similar(reference_text, state) > 0.50 and similar(reference_text, state) < 0.75: | |
score = "close" | |
else: | |
score = "wrong" | |
return state, score | |
else : | |
print(" Null Object") | |
# Function to retrieve the list of unique categories | |
def get_unique_tests(data_list): | |
tests=[] | |
tests = [test for test, _, _, _, _ in data_list if test not in tests] | |
return tests | |
# Function to retrieve close, wrong and image values for a given correct value | |
def get_values_image_for_test(tests, test): | |
for t, correct, close, wrong, image in tests: | |
if t == test: | |
return correct, close, wrong, image | |
# Counting completed tests | |
def completed_tests(test): | |
global completed_tests_list | |
if completed_tests_list is None: | |
completed_tests_list = [] | |
if test and test not in completed_tests_list : | |
completed_tests_list.append(test) | |
# elif test and test in completed_tests_list: | |
# gr.Warning("Test alreday done! Please select another test.") | |
total_completed_tests = len(completed_tests_list) | |
return total_completed_tests, completed_tests_list | |
# Tests distribution over users | |
import random | |
def assign_tests(num_tests): | |
global remaining_tests | |
assigned_tests = [] | |
tests = get_unique_tests(full_test_list) | |
# Shuffle tests | |
random.shuffle(tests) | |
# Updating remaining_tests | |
if (len(remaining_tests) == 0) and (len(assigned_tests) == 0): | |
remaining_tests = set(tests) | |
# else: | |
# remaining_tests -= set(assigned_tests) | |
# Last remaining tests are assigned to the last user | |
if num_tests > len(remaining_tests): | |
assigned_tests = remaining_tests | |
# Ensure we still have enough tests | |
if len(remaining_tests) == 0: | |
print("***** Tests Completed *****") | |
pass#return ["Tests Completed"] | |
# Select tests that are not already assigned | |
assigned_tests = [test for test in tests if test in remaining_tests][:num_tests] | |
# Remove assigned tests from the set of all available tests | |
remaining_tests -= set(assigned_tests) | |
return assigned_tests | |
# Tracking user progress | |
def test_progress(test): | |
global completed_tests_list | |
global len_assigned_test | |
# Get total completed tests | |
total_completed_tests, completed_tests_list = completed_tests(test) | |
# Test completed to be displayed to the user | |
completed_tests_text = "\n".join(completed_tests_list) | |
# Calculate how many tests are remaining | |
total_remaining_tests = len_assigned_test - total_completed_tests | |
if total_remaining_tests == 0: | |
completed_tests_list.clear() | |
progress_text = f"Congratulations! You have completed all tests" | |
else: | |
progress_text = f"Completed tests {total_completed_tests} . Remaining {total_remaining_tests}" | |
return progress_text, completed_tests_text | |
# Authentication function | |
def update_message(request: gr.Request): | |
return f"Welcome, {request.username}" | |
# Gradio apps | |
# Get test categories | |
data_list = full_test_list | |
image_path="all_imgs/" | |
completed_tests_list = [] | |
remaining_tests = [] | |
# Get a set of tests for each user | |
num_tests = 10 | |
assigned_tests = assign_tests(num_tests) | |
len_assigned_test = len(assigned_tests) | |
print(assigned_tests) | |
# Flagging | |
callback = gr.CSVLogger() | |
# CSS | |
css = """ | |
h1 { | |
text-align: center; | |
color: #5756BB; | |
display:block; | |
} | |
p { | |
text-align: left; | |
display:block; | |
} | |
p.thick { | |
font-weight: bold; | |
} | |
.drop_color {background-color: #e0eaff} | |
img { | |
height: auto; | |
width: auto; | |
margin-left: auto; | |
margin-right: auto; | |
display: block; | |
} | |
.row { | |
height: 90px; | |
width: auto; | |
} | |
progress_text { | |
text-align: center; | |
ont-weight: bold; | |
} | |
""" | |
js = """ | |
function change_color(){ | |
const test_text = document.getElementById("#test_color"); | |
const selectedtest = dropdown.options[dropdown.selectedIndex]; | |
selectedtest.style.color = "green"; | |
} | |
""" | |
demo = gr.Blocks(theme=gr.themes.Soft(), css=css) | |
with demo : | |
# Add app title | |
gr.Markdown( | |
""" | |
# Noorani Qaida Test Interface""" | |
) | |
# User authentication | |
with gr.Row(): | |
m = gr.Markdown() | |
logout_button = gr.Button("Logout", link="/logout", scale=0, variant="primary") | |
demo.load(update_message, None, m) | |
# Add app description | |
gr.Markdown( | |
""" | |
Select a category then a test from the list, read the text aloud, get the transcription and save it. | |
""" | |
) | |
# Assign value to each step | |
def get_test_text(test): | |
correct, close, wrong, image = get_values_image_for_test(data_list, test) | |
correct_text = gr.Textbox(label="Step 1", value=correct) | |
close_text = gr.Textbox(label="Step 2", value=close) | |
wrong_text = gr.Textbox(label="Step 3", value=wrong) | |
image = gr.Image(value=(image_path+str(image)+".jpg")) | |
return correct_text, close_text, wrong_text, image | |
def update_completed_test(test): | |
_, completed_tests = gr.TextArea(completed_tests(test)) | |
return completed_tests | |
with gr.Row(): | |
# First big column | |
with gr.Column(scale=1): | |
# User progress | |
progress_text = gr.Textbox(value="Completed tests 0. Remaining 10", interactive=False, show_label=False, container=True, elem_id="progress_text", elem_classes="drop_color") | |
textarea_completed = gr.TextArea(info=" Your completed tests will appear here ", interactive=False, rtl=True, container=True, show_label=False, elem_classes="drop_color") | |
# Test selection | |
tests_list = assigned_tests | |
test_dropdown = gr.Dropdown(tests_list, label="Tests", info="Select a test", scale=1, elem_id="test_color", elem_classes="drop_color") | |
test_dropdown.select(test_progress, test_dropdown, outputs=[progress_text, textarea_completed]) | |
# Second big column | |
with gr.Column(scale=3): | |
# First row for image | |
with gr.Row(): | |
image = gr.Image(elem_id="img", elem_classes="row", container=False, show_label=True, show_download_button=False, show_share_button=False) | |
# Second row for steps | |
with gr.Row(): | |
# First sub-column | |
with gr.Column(): | |
correct_text = gr.Textbox(label= "Step 1", info="Correct Text", interactive=False, rtl=True) | |
correct_audio_record = gr.Audio(sources="microphone" ,type="filepath", label="Record Audio", show_label=False) | |
correct_trans_text = gr.Textbox(info="Transcription", interactive=False, rtl=True, show_label=False) | |
correct_score = gr.Textbox(label="Score", visible= False) | |
correct_audio_record.stop_recording(fn=transcribe, inputs=[correct_audio_record, correct_text], outputs=[correct_trans_text, correct_score]) | |
# Second sub-column | |
with gr.Column(): | |
close_text = gr.Textbox(label="Step 2", info="Close Text", interactive=False, rtl=True) | |
close_audio_record = gr.Audio(sources="microphone", type="filepath", label="Record Audio", show_label=False) | |
close_trans_text = gr.Textbox(info="Transcription", interactive=False, rtl=True, show_label=False) | |
close_score = gr.Textbox(label="Score", visible= False) | |
close_audio_record.stop_recording(fn=transcribe, inputs=[close_audio_record, close_text], outputs=[close_trans_text, close_score]) | |
# Third sub-column | |
with gr.Column(): | |
wrong_text = gr.Textbox(label="Step 3", info="Wrong Text", interactive=False, rtl=True) | |
wrong_audio_record = gr.Audio(sources="microphone" ,type="filepath", label="Record Audio", show_label=False) | |
wrong_trans_text = gr.Textbox(info="Transcription", interactive=False, rtl=True, show_label=False) | |
wrong_score = gr.Textbox(label="Score", visible= False) | |
wrong_audio_record.stop_recording(fn=transcribe, inputs=[wrong_audio_record, wrong_text], outputs=[wrong_trans_text, wrong_score]) | |
# Row for flag | |
with gr.Row(): | |
#user_name = gr.Request.username | |
def save_outputs(correct_text, correct_trans_text, correct_score, correct_audio_record, | |
close_text, close_trans_text, close_score, close_audio_record, | |
wrong_text, wrong_trans_text, wrong_score, wrong_audio_record): | |
if any(len(text)==0 for text in [correct_trans_text, close_trans_text, wrong_trans_text]): | |
return gr.Warning("Please complete the test before saving") | |
return (lambda *args: callback.flag(args, username="randa")) | |
flag_btn = gr.Button(value="Save this test", variant="primary", scale=2) | |
# Setup the components to flag | |
callback.setup([correct_text, correct_trans_text, correct_score, correct_audio_record, | |
close_text, close_trans_text, close_score, close_audio_record, | |
wrong_text, wrong_trans_text, wrong_score, wrong_audio_record], "flagged_data") | |
# We can choose which components to flag | |
flag_btn.click(save_outputs, | |
[correct_text, correct_trans_text, correct_score, correct_audio_record, | |
close_text, close_trans_text, close_score, close_audio_record, | |
wrong_text, wrong_trans_text, wrong_score, wrong_audio_record], | |
None, preprocess=False) | |
# Update test values according to the selected test | |
test_dropdown.input(get_test_text, test_dropdown, [correct_text, close_text, wrong_text, image]) | |
# Clear the transcription after selecting a new test | |
test_dropdown.select(lambda: [None]*10, None, | |
outputs=[correct_text, close_text, wrong_text, correct_audio_record, close_audio_record, wrong_audio_record, correct_trans_text, close_trans_text, wrong_trans_text, image], | |
queue=False) | |
#demo.launch(inbrowser=True) | |
if __name__ == "__main__": | |
demo.launch(auth=[("randa", "randa"), ("randa1", "randa1"),("randa2", "randa2"), ("randa3", "randa3"), | |
("karim1", "karim1"), ("karim2", "karim2"), ("karim3", "karim3"), | |
("yassir1", "yassir1"), ("yassir2", "yassir2"), ("yassir3", "yassir3"), | |
("mehdi", "mehdi")], | |
share=True, inbrowser=True) |