cecilemacaire's picture
Update app.py
550ce4a verified
raw
history blame
6.37 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import pandas as pd
from fpdf import FPDF
import whisper
import tempfile
from st_audiorec import st_audiorec
import numpy as np
# Interface utilisateur
st.set_page_config(
page_title="Traduction de la parole en pictogrammes ARASAAC",
page_icon="📝",
layout="wide"
)
# Charger le modèle et le tokenizer
checkpoint = "Propicto/t2p-nllb-200-distilled-600M-all"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
# Charger le modèle Whisper
whisper_model = whisper.load_model("base")
# Lire le lexique
@st.cache_data
def read_lexicon(lexicon):
df = pd.read_csv(lexicon, sep='\t')
df['keyword_no_cat'] = df['lemma'].str.split(' #').str[0].str.strip().str.replace(' ', '_')
return df
lexicon = read_lexicon("lexicon.csv")
# Processus de sortie de la traduction
def process_output_trad(pred):
return pred.split()
def get_id_picto_from_predicted_lemma(df_lexicon, lemma):
if lemma.endswith("!"):
lemma = lemma[:-1]
id_picto = df_lexicon.loc[df_lexicon['keyword_no_cat'] == lemma, 'id_picto'].tolist()
return (id_picto[0], lemma) if id_picto else (0, lemma)
# Génération du contenu HTML pour afficher les pictogrammes
def generate_html(ids):
html_content = '<html><head><style>'
html_content += '''
figure {
display: inline-block;
text-align: center;
font-family: Arial, sans-serif;
margin: 0;
}
figcaption {
color: black;
background-color: white;
border-radius: 5px;
}
img {
background-color: white;
margin: 0;
padding: 0;
border-radius: 6px;
}
'''
html_content += '</style></head><body>'
for picto_id, lemma in ids:
if picto_id != 0: # ignore invalid IDs
img_url = f"https://static.arasaac.org/pictograms/{picto_id}/{picto_id}_500.png"
html_content += f'''
<figure>
<img src="{img_url}" alt="{lemma}" width="200" height="200"/>
<figcaption>{lemma}</figcaption>
</figure>
'''
html_content += '</body></html>'
return html_content
# Génération du PDF
def generate_pdf(ids):
pdf = FPDF(orientation='L', unit='mm', format='A4') # 'L' for landscape orientation
pdf.add_page()
pdf.set_auto_page_break(auto=True, margin=15)
# Start positions
x_start = 10
y_start = 10
img_width = 50
img_height = 50
spacing = 1
max_width = 297 # A4 landscape width in mm
current_x = x_start
current_y = y_start
for picto_id, lemma in ids:
if picto_id != 0: # ignore invalid IDs
img_url = f"https://static.arasaac.org/pictograms/{picto_id}/{picto_id}_500.png"
pdf.image(img_url, x=current_x, y=current_y, w=img_width, h=img_height)
pdf.set_xy(current_x, current_y + img_height + 5)
pdf.set_font("Arial", size=12)
pdf.cell(img_width, 10, txt=lemma, ln=1, align='C')
current_x += img_width + spacing
# Move to the next line if exceeds max width
if current_x + img_width > max_width:
current_x = x_start
current_y += img_height + spacing + 10 # Adjust for image height and some spacing
pdf_path = "pictograms.pdf"
pdf.output(pdf_path)
return pdf_path
# Initialiser l'état de session
if 'transcription' not in st.session_state:
st.session_state['transcription'] = None
if 'pictogram_ids' not in st.session_state:
st.session_state['pictogram_ids'] = None
if 'previous_audio_file' not in st.session_state:
st.session_state['previous_audio_file'] = None
# Interface utilisateur pour l'audio et le bouton de téléchargement
st.title("Traduction de la parole en pictogrammes ARASAAC")
col1, col2 = st.columns(2)
with col1:
audio_file = st.file_uploader("Ajouter un fichier audio :", type=["wav", "mp3"])
# Réinitialiser les informations si le fichier audio change
if audio_file is not None and audio_file != st.session_state['previous_audio_file']:
st.session_state['transcription'] = None
st.session_state['pictogram_ids'] = None
st.session_state['previous_audio_file'] = audio_file
with col2:
if audio_file is not None:
with st.spinner("Transcription de l'audio en cours..."):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
temp_file.write(audio_file.read())
temp_file_path = temp_file.name
transcription = whisper_model.transcribe(temp_file_path, language='fr')
if 'transcription' in locals():
st.text_area("Transcription :", transcription['text'])
st.session_state['transcription'] = transcription['text']
with st.spinner("Affichage des pictogrammes..."):
if st.session_state['transcription'] is not None:
inputs = tokenizer(transcription['text'].lower(), return_tensors="pt").input_ids
outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
pred = tokenizer.decode(outputs[0], skip_special_tokens=True)
sentence_to_map = process_output_trad(pred)
pictogram_ids = [get_id_picto_from_predicted_lemma(lexicon, lemma) for lemma in sentence_to_map]
st.session_state['pictogram_ids'] = [get_id_picto_from_predicted_lemma(lexicon, lemma) for lemma in sentence_to_map]
if st.session_state['pictogram_ids'] is not None:
html = generate_html(st.session_state['pictogram_ids'])
st.components.v1.html(html, height=500, scrolling=True)
# Container to hold the download button
pdf_path = generate_pdf(st.session_state['pictogram_ids'])
with open(pdf_path, "rb") as pdf_file:
st.download_button(label="Télécharger la traduction en PDF", data=pdf_file, file_name="pictograms.pdf", mime="application/pdf")
# record_audio = st_audiorec()
# if record_audio:
# audio = np.array(record_audio)
# transcription = whisper_model.transcribe(audio, language='fr')
# st.success("Enregistrement terminé !")