Spaces:
Running
Running
File size: 2,702 Bytes
a3d26e6 b10792b a3d26e6 b10792b a3d26e6 b10792b c9febc6 a3d26e6 c9febc6 a3d26e6 b10792b a3d26e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import os
import tempfile
import streamlit as st
from utils.document import audit_descriptif_pdf
def read_and_save_file():
for file in st.session_state["file_uploader"]:
with tempfile.NamedTemporaryFile(delete=False) as tf:
tf.write(file.getbuffer())
file_path = tf.name
if not any(f["name"] == file.name for f in st.session_state["files"]):
st.session_state["files"].append({
"name": file.name,
"audit": audit_descriptif_pdf(file)["audit"]
})
with st.session_state["ingestion_spinner"], st.spinner(f"Chargement {file.name}"):
st.session_state["assistant"].ingest(file_path)
os.remove(file_path)
def page():
st.subheader("Charger vos documents")
if "files" not in st.session_state:
st.session_state["files"] = []
# Custom CSS to hide default English labels
# st.markdown(
# """
# <style>
# div[data-testid="stFileUploaderDropzoneInstructions"] div::before {
# content: "Déposer les fichiers ici ou cliquer pour parcourir";
# display: block;
# font-size: 14px;
# color: #333;
# text-align: center;
# padding: 10px;
# }
# div[data-testid="stFileUploaderDropzone"] div[role="button"] span {
# display: none !important;
# }
# </style>
# """,
# unsafe_allow_html=True
# )
# File uploader
st.file_uploader(
"Télécharger un ou plusieurs documents",
type=["pdf"],
key="file_uploader",
accept_multiple_files=True,
on_change=read_and_save_file,
)
for file in st.session_state["files"]:
st.markdown(f"#### {file['name']}")
audit = file["audit"]
st.markdown(
"""
<table>
<tr><td>Nombre de pages</td><td>{}</td></tr>
<tr><td>Nombre d'images</td><td>{}</td></tr>
<tr><td>Nombre de liens</td><td>{}</td></tr>
<tr><td>Nombre de tableaux</td><td>{}</td></tr>
<tr><td>Nombre de tokens</td><td>{}</td></tr>
<tr><td>Nombre de mots</td><td>{}</td></tr>
</table>
""".format(
audit['number_of_pages'],
audit['number_of_images'],
audit['number_of_links'],
audit['number_of_tables'],
audit['number_of_tokens'],
audit['number_of_words']
),
unsafe_allow_html=True
)
st.session_state["ingestion_spinner"] = st.empty()
page() |