Spaces:
Running
Running
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import pandas as pd | |
from fpdf import FPDF | |
import whisper | |
import tempfile | |
from st_audiorec import st_audiorec | |
import numpy as np | |
# Interface utilisateur | |
st.set_page_config( | |
page_title="Traduction de la parole en pictogrammes ARASAAC", | |
page_icon="📝", | |
layout="wide" | |
) | |
# Charger le modèle et le tokenizer | |
checkpoint = "Propicto/t2p-nllb-200-distilled-600M-all" | |
tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) | |
# Charger le modèle Whisper | |
whisper_model = whisper.load_model("base") | |
# Lire le lexique | |
def read_lexicon(lexicon): | |
df = pd.read_csv(lexicon, sep='\t') | |
df['keyword_no_cat'] = df['lemma'].str.split(' #').str[0].str.strip().str.replace(' ', '_') | |
return df | |
lexicon = read_lexicon("lexicon.csv") | |
# Processus de sortie de la traduction | |
def process_output_trad(pred): | |
return pred.split() | |
def get_id_picto_from_predicted_lemma(df_lexicon, lemma): | |
if lemma.endswith("!"): | |
lemma = lemma[:-1] | |
id_picto = df_lexicon.loc[df_lexicon['keyword_no_cat'] == lemma, 'id_picto'].tolist() | |
return (id_picto[0], lemma) if id_picto else (0, lemma) | |
# Génération du contenu HTML pour afficher les pictogrammes | |
def generate_html(ids): | |
html_content = '<html><head><style>' | |
html_content += ''' | |
figure { | |
display: inline-block; | |
text-align: center; | |
font-family: Arial, sans-serif; | |
margin: 0; | |
} | |
figcaption { | |
color: black; | |
background-color: white; | |
border-radius: 5px; | |
} | |
img { | |
background-color: white; | |
margin: 0; | |
padding: 0; | |
border-radius: 6px; | |
} | |
''' | |
html_content += '</style></head><body>' | |
for picto_id, lemma in ids: | |
if picto_id != 0: # ignore invalid IDs | |
img_url = f"https://static.arasaac.org/pictograms/{picto_id}/{picto_id}_500.png" | |
html_content += f''' | |
<figure> | |
<img src="{img_url}" alt="{lemma}" width="200" height="200"/> | |
<figcaption>{lemma}</figcaption> | |
</figure> | |
''' | |
html_content += '</body></html>' | |
return html_content | |
# Génération du PDF | |
def generate_pdf(ids): | |
pdf = FPDF(orientation='L', unit='mm', format='A4') # 'L' for landscape orientation | |
pdf.add_page() | |
pdf.set_auto_page_break(auto=True, margin=15) | |
# Start positions | |
x_start = 10 | |
y_start = 10 | |
img_width = 50 | |
img_height = 50 | |
spacing = 1 | |
max_width = 297 # A4 landscape width in mm | |
current_x = x_start | |
current_y = y_start | |
for picto_id, lemma in ids: | |
if picto_id != 0: # ignore invalid IDs | |
img_url = f"https://static.arasaac.org/pictograms/{picto_id}/{picto_id}_500.png" | |
pdf.image(img_url, x=current_x, y=current_y, w=img_width, h=img_height) | |
pdf.set_xy(current_x, current_y + img_height + 5) | |
pdf.set_font("Arial", size=12) | |
pdf.cell(img_width, 10, txt=lemma, ln=1, align='C') | |
current_x += img_width + spacing | |
# Move to the next line if exceeds max width | |
if current_x + img_width > max_width: | |
current_x = x_start | |
current_y += img_height + spacing + 10 # Adjust for image height and some spacing | |
pdf_path = "pictograms.pdf" | |
pdf.output(pdf_path) | |
return pdf_path | |
# Initialiser l'état de session | |
if 'transcription' not in st.session_state: | |
st.session_state['transcription'] = None | |
if 'pictogram_ids' not in st.session_state: | |
st.session_state['pictogram_ids'] = None | |
if 'previous_audio_file' not in st.session_state: | |
st.session_state['previous_audio_file'] = None | |
# Interface utilisateur pour l'audio et le bouton de téléchargement | |
st.title("Traduction de la parole en pictogrammes ARASAAC") | |
col1, col2 = st.columns(2) | |
with col1: | |
audio_file = st.file_uploader("Ajouter un fichier audio :", type=["wav", "mp3"]) | |
# Réinitialiser les informations si le fichier audio change | |
if audio_file is not None and audio_file != st.session_state['previous_audio_file']: | |
st.session_state['transcription'] = None | |
st.session_state['pictogram_ids'] = None | |
st.session_state['previous_audio_file'] = audio_file | |
with col2: | |
if audio_file is not None: | |
with st.spinner("Transcription de l'audio en cours..."): | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file: | |
temp_file.write(audio_file.read()) | |
temp_file_path = temp_file.name | |
transcription = whisper_model.transcribe(temp_file_path, language='fr') | |
if 'transcription' in locals(): | |
st.text_area("Transcription :", transcription['text']) | |
st.session_state['transcription'] = transcription['text'] | |
with st.spinner("Affichage des pictogrammes..."): | |
if st.session_state['transcription'] is not None: | |
inputs = tokenizer(transcription['text'].lower(), return_tensors="pt").input_ids | |
outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95) | |
pred = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
sentence_to_map = process_output_trad(pred) | |
pictogram_ids = [get_id_picto_from_predicted_lemma(lexicon, lemma) for lemma in sentence_to_map] | |
st.session_state['pictogram_ids'] = [get_id_picto_from_predicted_lemma(lexicon, lemma) for lemma in sentence_to_map] | |
if st.session_state['pictogram_ids'] is not None: | |
html = generate_html(st.session_state['pictogram_ids']) | |
st.components.v1.html(html, height=500, scrolling=True) | |
# Container to hold the download button | |
pdf_path = generate_pdf(st.session_state['pictogram_ids']) | |
with open(pdf_path, "rb") as pdf_file: | |
st.download_button(label="Télécharger la traduction en PDF", data=pdf_file, file_name="pictograms.pdf", mime="application/pdf") | |
# record_audio = st_audiorec() | |
# if record_audio: | |
# audio = np.array(record_audio) | |
# transcription = whisper_model.transcribe(audio, language='fr') | |
# st.success("Enregistrement terminé !") |