Spaces:

Nugh75
/

Edurag_beta

Sleeping

App Files Files Community

Nugh75 commited on Dec 31, 2024

Commit

65416ea

1 Parent(s): d3b9bfa

updatate interfaccia chatbot

Browse files

bisogna aggiornare gli altri tab

Files changed (19) hide show

.gradio/certificate.pem +31 -0
app.py +222 -92
app/__pycache__/__init__.cpython-310.pyc +0 -0
app/__pycache__/config.cpython-310.pyc +0 -0
app/__pycache__/document_handling.cpython-310.pyc +0 -0
app/__pycache__/llm_handling.cpython-310.pyc +0 -0
app/__pycache__/logging_config.cpython-310.pyc +0 -0
app/config.py +7 -1
app/document_handling.py +212 -49
app/import pytest.py +58 -0
app/llm_handling.py +82 -68
app/llm_handling_3.py +76 -0
app/test_llm_handling.py +30 -0
app_1.py +275 -0
faiss_index_Daniele2/index.faiss +0 -0
faiss_index_E-learning/index.faiss +0 -0
{faiss_index_Daniele2 → faiss_index_E-learning}/index.pkl +2 -2
faiss_index_E-learning/metadata.json +16 -0
requirements.txt +50 -23

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

app.py CHANGED Viewed

@@ -1,23 +1,43 @@
 import gradio as gr
-from app.document_handling import *
 from app.llm_handling import answer_question
 from app.logging_config import configure_logging
 configure_logging()
 def update_dropdowns():
-    """Aggiorna tutti i dropdown con la lista aggiornata dei database"""
     databases = list_databases()
     return [gr.update(choices=databases) for _ in range(6)]
 def extract_text_from_files(files):
     text = ""
     for file in files:
         try:
             if file.name.endswith('.pdf'):
-                text += extract_text_from_pdf(file.name)
             elif file.name.endswith('.docx'):
-                text += extract_text_from_docx(file.name)
             else:
                 with open(file.name, 'r', encoding='utf-8') as f:
                     text += f.read()
@@ -25,12 +45,14 @@ def extract_text_from_files(files):
             logging.error(f"Errore durante la lettura del file {file.name}: {e}")
     return text
 with gr.Blocks() as rag_chatbot:
     gr.Markdown("# Chatbot basato su RAG")
     databases = list_databases()
-    # Definizione dei dropdown prima del loro utilizzo
     db_name_upload = gr.State()
     db_name_list = gr.State()
     db_name_chat = gr.State()
@@ -38,152 +60,260 @@ with gr.Blocks() as rag_chatbot:
     modify_db_old_name = gr.State()
     delete_db_dropdown = gr.State()
     with gr.Tab("Chatbot"):
         with gr.Row():
             with gr.Column(scale=2):
-                db_name_chat = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
-                chatbot = gr.Chatbot(label="Conversazione")
-                with gr.Row():
-                    # Aggiunta upload file direttamente nella chat
-                    file_input = gr.File(
-                        label="Carica PDF per la conversazione",
-                        file_types=[".pdf", ".docx", ".txt"],
-                        file_count="multiple"
-                    )
-                    upload_button = gr.Button("Carica Documenti")
                 question_input = gr.Textbox(
                     label="Fai una domanda",
                     placeholder="Scrivi qui la tua domanda...",
                     lines=2
                 )
                 with gr.Row():
                     ask_button = gr.Button("Invia")
-                    clear_button = gr.Button("Pulisci Chat")
         chat_state = gr.State([])
         def chat_upload_and_respond(files, chat_history, db_name):
             # Estrai il testo dai file
             text = extract_text_from_files(files)
-            # Aggiungi il testo alla chat come messaggio dell'utente
-            chat_history.append((None, "📄 Contenuto dei documenti caricati:"))
-            chat_history.append((None, text))
             return chat_history
         def respond(message, chat_history, db_name):
-            bot_message = answer_question(message, db_name)
-            chat_history.append((message, bot_message))
             return "", chat_history
         def clear_chat():
             return [], []
-        # Eventi
         upload_button.click(
-            chat_upload_and_respond,
             inputs=[file_input, chat_state, db_name_chat],
-            outputs=[chatbot]
         )
         ask_button.click(
-            respond,
             inputs=[question_input, chat_state, db_name_chat],
             outputs=[question_input, chatbot]
         )
         clear_button.click(
-            clear_chat,
             outputs=[chatbot, chat_state]
         )
-    with gr.Tab("Creazione Database"):
-        db_name_input = gr.Textbox(label="Nome Nuovo Database")
-        create_db_button = gr.Button("Crea Database")
-        create_output = gr.Textbox(label="Stato Creazione")
         create_db_button.click(
-            create_database,
-            inputs=db_name_input,
-            outputs=create_output
         ).then(
             update_dropdowns,
-            outputs=[
-                db_name_upload,
-                db_name_list,
-                db_name_chat,
-                db_name_new,
-                modify_db_old_name,
-                delete_db_dropdown
-            ]
         )
-    with gr.Tab("Modifica Database"):
-        modify_db_old_name = gr.Dropdown(choices=databases, label="Database da Rinominare")
-        modify_db_new_name = gr.Textbox(label="Nuovo Nome")
-        modify_db_button = gr.Button("Rinomina Database")
-        modify_output = gr.Textbox(label="Stato Modifica")
         modify_db_button.click(
-            modify_database,
-            inputs=[modify_db_old_name, modify_db_new_name],
             outputs=modify_output
         ).then(
             update_dropdowns,
-            outputs=[
-                db_name_upload,
-                db_name_list,
-                db_name_chat,
-                db_name_new,
-                modify_db_old_name,
-                delete_db_dropdown
-            ]
         )
-    with gr.Tab("Eliminazione Database"):
-        delete_db_dropdown = gr.Dropdown(choices=databases, label="Database da Eliminare")
-        delete_db_button = gr.Button("Elimina Database")
-        delete_output = gr.Textbox(label="Stato Eliminazione")
         delete_db_button.click(
-            delete_database,
-            inputs=delete_db_dropdown,
             outputs=delete_output
         ).then(
             update_dropdowns,
-            outputs=[
-                db_name_upload,
-                db_name_list,
-                db_name_chat,
-                db_name_new,
-                modify_db_old_name,
-                delete_db_dropdown
-            ]
         )
-    with gr.Tab("Carica Documenti"):
-        file_input = gr.File(label="Carica i tuoi documenti", file_types=[".txt", ".pdf", ".docx"], file_count="multiple")
-        db_name_upload = gr.Dropdown(choices=databases, label="Seleziona o Crea Database", value="default_db")
-        upload_button = gr.Button("Indicizza Documenti")
-        upload_output = gr.Textbox(label="Stato")
-        upload_button.click(upload_and_index, inputs=[file_input, db_name_upload], outputs=upload_output)
-    with gr.Tab("Visualizza Documenti Indicizzati"):
-        db_name_list = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
-        list_button = gr.Button("Visualizza Documenti")
-        list_output = gr.Textbox(label="Elenco Documenti")
-        list_button.click(list_indexed_documents, inputs=db_name_list, outputs=list_output)
-    # Adding a new tab for new functionalities
     with gr.Tab("Nuove Funzionalità"):
         gr.Markdown("## Cerca Documenti e Genera Riassunto")
@@ -200,13 +330,13 @@ with gr.Blocks() as rag_chatbot:
             inputs=[search_input, db_name_new],
             outputs=search_output
         )
         # summary_button.click(
         #     generate_summary,
         #     inputs=db_name_new,
         #     outputs=summary_output
         # )
-# Avvio dell'app su Hugging Face
 if __name__ == "__main__":
-    rag_chatbot.launch(share=True)

 import gradio as gr
+import logging
+# Ipotizziamo che tu abbia queste funzioni nel tuo progetto:
+# - list_databases(), create_database(), modify_database(), delete_database()...
+# - list_indexed_files(), upload_and_index(), delete_file_from_database(), etc.
+# - search_documents(), list_indexed_documents()...
+#
+# Se hanno nomi o posizioni diverse, adatta gli import di conseguenza
+from app.document_handling import (
+    list_databases,
+    create_database,
+    modify_database,
+    delete_database,
+    upload_and_index,
+    list_indexed_files,
+    delete_file_from_database,
+    list_indexed_documents,
+    search_documents,
+)
 from app.llm_handling import answer_question
 from app.logging_config import configure_logging
 configure_logging()
 def update_dropdowns():
+    """Aggiorna tutti i dropdown con la lista aggiornata dei database."""
     databases = list_databases()
+    # Ritorniamo 6 update() perché nel codice ci sono 6 dropdown da sincronizzare
     return [gr.update(choices=databases) for _ in range(6)]
 def extract_text_from_files(files):
+    """Estrae e concatena il testo da PDF, DOCX e TXT."""
     text = ""
     for file in files:
         try:
             if file.name.endswith('.pdf'):
+                text += extract_text_from_pdf(file.name)   # Definita in document_handling
             elif file.name.endswith('.docx'):
+                text += extract_text_from_docx(file.name)  # Definita in document_handling
             else:
                 with open(file.name, 'r', encoding='utf-8') as f:
                     text += f.read()
             logging.error(f"Errore durante la lettura del file {file.name}: {e}")
     return text
 with gr.Blocks() as rag_chatbot:
     gr.Markdown("# Chatbot basato su RAG")
     databases = list_databases()
+    # Questi State() servono per la gestione dei dropdown.
+    # Se non ti servono come stati separati, puoi anche rimuoverli.
     db_name_upload = gr.State()
     db_name_list = gr.State()
     db_name_chat = gr.State()
     modify_db_old_name = gr.State()
     delete_db_dropdown = gr.State()
+    # =============================================
+    #                 TAB: Chatbot
+    # =============================================
     with gr.Tab("Chatbot"):
         with gr.Row():
             with gr.Column(scale=2):
+                # Dropdown per selezionare il DB
+                db_name_chat = gr.Dropdown(
+                    choices=databases,
+                    label="Seleziona Database",
+                    value="default_db"
+                )
+                # Chatbot component
+                chatbot = gr.Chatbot(label="Conversazione", type="messages")
+                # Input domanda
                 question_input = gr.Textbox(
                     label="Fai una domanda",
                     placeholder="Scrivi qui la tua domanda...",
                     lines=2
                 )
+               # Bottoni azione
                 with gr.Row():
                     ask_button = gr.Button("Invia")
+                    clear_button = gr.Button("Pulisci Chat")
+                # File upload con dimensioni ridotte
+                with gr.Row():
+                    file_input = gr.File(
+                        label="Carica PDF/Docx/TXT per la conversazione",
+                        file_types=[".pdf", ".docx", ".txt"],
+                        file_count="multiple",
+                        height="100px",  # Altezza ridotta
+                        scale=3  # Riduce la larghezza relativa
+                    )
+                    upload_button = gr.Button("Carica Documenti", scale=1)
+        # Stato chat
         chat_state = gr.State([])
+        # ----------------------
+        #  FUNZIONI DI CALLBACK
+        # ----------------------
         def chat_upload_and_respond(files, chat_history, db_name):
+            # Se chat_history è None, inizializziamo
+            if chat_history is None:
+                chat_history = []
             # Estrai il testo dai file
             text = extract_text_from_files(files)
+            # Aggiungo un messaggio "assistant" che mostra il testo caricato
+            chat_history.append({
+                "role": "assistant",
+                "content": f"📄 Contenuto dei documenti caricati:\n{text}"
+            })
             return chat_history
         def respond(message, chat_history, db_name):
+            if chat_history is None:
+                chat_history = []
+            # `answer_question` restituisce due messaggi (user + assistant) in lista
+            new_messages = answer_question(message, db_name)
+            # Li aggiungiamo in coda alla history
+            chat_history.extend(new_messages)
+            # Ritorniamo l'input svuotato (per pulire il Textbox) e la nuova history
             return "", chat_history
         def clear_chat():
+            # Svuota la chat
             return [], []
+        # ------------------
+        #   EVENTI BOTTONE
+        # ------------------
         upload_button.click(
+            fn=chat_upload_and_respond,
             inputs=[file_input, chat_state, db_name_chat],
+            outputs=chatbot
         )
         ask_button.click(
+            fn=respond,
             inputs=[question_input, chat_state, db_name_chat],
             outputs=[question_input, chatbot]
         )
         clear_button.click(
+            fn=clear_chat,
             outputs=[chatbot, chat_state]
         )
+    # =============================================
+    #            TAB: Gestione Database
+    # =============================================
+    with gr.Tab("Gestione Database"):
+        gr.Markdown("## Operazioni sui Database")
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### Crea Database")
+                db_name_input = gr.Textbox(label="Nome Nuovo Database")
+                create_db_button = gr.Button("Crea Database")
+                create_output = gr.Textbox(label="Stato Creazione")
+            with gr.Column():
+                gr.Markdown("### Rinomina Database")
+                modify_db_old_name = gr.Dropdown(choices=databases, label="Database da Rinominare")
+                modify_db_new_name = gr.Textbox(label="Nuovo Nome")
+                modify_db_button = gr.Button("Rinomina Database")
+                modify_output = gr.Textbox(label="Stato Modifica")
+            with gr.Column():
+                gr.Markdown("### Elimina Database")
+                delete_db_dropdown = gr.Dropdown(choices=databases, label="Database da Eliminare")
+                delete_db_button = gr.Button("Elimina Database")
+                delete_output = gr.Textbox(label="Stato Eliminazione")
+        # Eventi per i pulsanti di gestione DB
         create_db_button.click(
+            create_database,           # funzione
+            inputs=db_name_input,      # input
+            outputs=create_output      # output
         ).then(
             update_dropdowns,
+            outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
         )
         modify_db_button.click(
+            modify_database,
+            inputs=[modify_db_old_name, modify_db_new_name],
             outputs=modify_output
         ).then(
             update_dropdowns,
+            outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
         )
         delete_db_button.click(
+            delete_database,
+            inputs=delete_db_dropdown,
             outputs=delete_output
         ).then(
             update_dropdowns,
+            outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
         )
+    # =============================================
+    #         TAB: Gestione Documenti
+    # =============================================
+    with gr.Tab("Gestione Documenti"):
+        with gr.Column():
+            gr.Markdown("### Carica Documenti")
+            with gr.Row():
+                file_input = gr.File(
+                    label="Carica i tuoi documenti",
+                    file_types=[".txt", ".pdf", ".docx"],
+                    file_count="multiple"
+                )
+                db_name_upload = gr.Dropdown(
+                    choices=databases,
+                    label="Seleziona Database",
+                    value="default_db"
+                )
+            with gr.Row():
+                title_input = gr.Textbox(label="Titolo del documento")
+                author_input = gr.Textbox(label="Autore")
+            upload_button = gr.Button("Indicizza Documenti")
+            upload_output = gr.Textbox(label="Stato Upload")
+            with gr.Column():
+                gr.Markdown("### Documenti nel Database")
+                db_name_list = gr.Dropdown(
+                    choices=databases,
+                    label="Seleziona Database",
+                    value="default_db"
+                )
+                list_button = gr.Button("Visualizza Files")
+                list_output = gr.Textbox(label="Files nel Database")
+                delete_file_input = gr.Textbox(label="Nome file da eliminare")
+                delete_file_button = gr.Button("Elimina File")
+                delete_file_output = gr.Textbox(label="Stato Eliminazione")
+        # Eventi
+        upload_button.click(
+            upload_and_index,
+            inputs=[file_input, title_input, author_input, db_name_upload],
+            outputs=upload_output
+        ).then(
+            list_indexed_files,
+            inputs=db_name_list,
+            outputs=list_output
+        )
+        list_button.click(
+            list_indexed_files,
+            inputs=db_name_list,
+            outputs=list_output
+        )
+        delete_file_button.click(
+            delete_file_from_database,
+            inputs=[delete_file_input, db_name_list],
+            outputs=delete_file_output
+        ).then(
+            list_indexed_files,
+            inputs=db_name_list,
+            outputs=list_output
+        ).then(
+            update_dropdowns,
+            outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
+        )
+    # =============================================
+    #      TAB: Visualizza Documenti Indicizzati
+    # =============================================
+    with gr.Tab("Visualizza Documenti Indicizzati"):
+        with gr.Column():
+            gr.Markdown("### Documenti nel Database")
+            db_name_list = gr.Dropdown(
+                choices=databases,
+                label="Seleziona Database",
+                value="default_db",
+                interactive=True
+            )
+            list_button = gr.Button("Visualizza Documenti")
+            list_output = gr.Textbox(
+                label="Elenco Documenti",
+                lines=10,
+                interactive=False,
+                value="Clicca 'Visualizza Documenti' per vedere l'elenco"
+            )
+            list_button.click(
+                fn=list_indexed_documents,
+                inputs=[db_name_list],
+                outputs=[list_output],
+                api_name="list_docs"
+            )
+    # =============================================
+    #         TAB: Nuove Funzionalità
+    # =============================================
     with gr.Tab("Nuove Funzionalità"):
         gr.Markdown("## Cerca Documenti e Genera Riassunto")
             inputs=[search_input, db_name_new],
             outputs=search_output
         )
+        # Esempio di eventuale generazione riassunto
         # summary_button.click(
         #     generate_summary,
         #     inputs=db_name_new,
         #     outputs=summary_output
         # )
+# Avvio dell'app
 if __name__ == "__main__":
+    rag_chatbot.launch()

app/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/__init__.cpython-310.pyc and b/app/__pycache__/__init__.cpython-310.pyc differ

app/__pycache__/config.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/config.cpython-310.pyc and b/app/__pycache__/config.cpython-310.pyc differ

app/__pycache__/document_handling.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/document_handling.cpython-310.pyc and b/app/__pycache__/document_handling.cpython-310.pyc differ

app/__pycache__/llm_handling.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/llm_handling.cpython-310.pyc and b/app/__pycache__/llm_handling.cpython-310.pyc differ

app/__pycache__/logging_config.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/logging_config.cpython-310.pyc and b/app/__pycache__/logging_config.cpython-310.pyc differ

app/config.py CHANGED Viewed

@@ -1,4 +1,10 @@
 import os
 # Configurazione del modello
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

 import os
+from dotenv import load_dotenv
+# Carica le variabili d'ambiente dal file .env
+load_dotenv()
 # Configurazione del modello
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+if not OPENAI_API_KEY:
+    raise ValueError("OPENAI_API_KEY non trovata. Verifica il file .env")

app/document_handling.py CHANGED Viewed

@@ -7,10 +7,45 @@ import shutil
 import PyPDF2
 from docx import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 # Initialize the text splitter
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
 def extract_text_from_pdf(file_path):
     with open(file_path, 'rb') as f:
         reader = PyPDF2.PdfReader(f)
@@ -26,57 +61,27 @@ def extract_text_from_docx(file_path):
         text += para.text + "\n"
     return text
-def upload_and_index(files, db_name="default_db"):
-    if not files:
-        logging.warning("Nessun file fornito per l'indicizzazione.")
-        return "Nessun file caricato."
-    documents = []
-    for file in files:
-        try:
-            if file.name.endswith('.pdf'):
-                text = extract_text_from_pdf(file.name)
-            elif file.name.endswith('.docx'):
-                text = extract_text_from_docx(file.name)
-            else:
-                with open(file.name, 'r', encoding='utf-8') as f:
-                    text = f.read()
-            # Split the text into chunks
-            chunks = text_splitter.split_text(text)
-            documents.extend(chunks)
-        except Exception as e:
-            logging.error(f"Errore durante la lettura del file {file.name}: {e}")
-            continue
-    # Creazione dell'indice con FAISS e Hugging Face embeddings
-    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-    vectorstore = FAISS.from_texts(documents, embeddings)
-    db_path = f"faiss_index_{db_name}"
-    vectorstore.save_local(db_path)
-    logging.info(f"Documenti indicizzati con successo nel database {db_name}.")
-    return f"Documenti indicizzati con successo nel database {db_name}!"
-def list_indexed_documents(db_name="default_db"):
     db_path = f"faiss_index_{db_name}"
     if not os.path.exists(db_path):
         logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
-        return "Nessun documento indicizzato."
-    # Carica l'indice FAISS
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
-    # Ottieni i documenti dall'indice
-    documents = [doc.page_content for doc in vectorstore.docstore._dict.values()]
-    num_chunks = len(documents)
-    if not documents:
-        return "Nessun documento trovato nell'indice."
-    # Formatta la lista dei documenti
-    document_list = "\n".join([f"{i+1}. {doc}" for i, doc in enumerate(documents)])
-    return f"Documenti nel database {db_name} (Numero di chunk: {num_chunks}):\n{document_list}"
 def create_database(db_name):
     logging.info(f"Creating database: {db_name}")
     db_path = f"faiss_index_{db_name}"
@@ -132,11 +137,165 @@ def list_databases():
     except Exception as e:
         logging.error(f"Error listing databases: {e}")
         return []
-def generate_summary(db_name="default_db"):
-    # Placeholder for summarization logic
-    return "This is a summary of the documents in the database."
 def search_documents(query, db_name="default_db"):
     db_path = f"faiss_index_{db_name}"
     if not os.path.exists(db_path):
@@ -154,4 +313,8 @@ def search_documents(query, db_name="default_db"):
     # Collect the document contents
     results = [doc.page_content for doc in docs]
-    return "\n\n".join(results)

 import PyPDF2
 from docx import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from dataclasses import dataclass
+import json
+from datetime import datetime
 # Initialize the text splitter
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+# -------------- UTILITY FUNCTIONS --------------
+@dataclass
+class DocumentMetadata:
+    filename: str
+    title: str
+    author: str
+    upload_date: str
+    chunks: int
+    def to_dict(self):
+        return {
+            "filename": self.filename,
+            "title": self.title,
+            "author": self.author,
+            "upload_date": self.upload_date,
+            "chunks": self.chunks
+        }
+def save_metadata(metadata_list, db_name):
+    db_path = f"faiss_index_{db_name}"
+    metadata_file = os.path.join(db_path, "metadata.json")
+    existing_metadata = []
+    if os.path.exists(metadata_file):
+        with open(metadata_file, 'r') as f:
+            existing_metadata = json.load(f)
+    existing_metadata.extend([m.to_dict() for m in metadata_list])
+    with open(metadata_file, 'w') as f:
+        json.dump(existing_metadata, f, indent=2)
 def extract_text_from_pdf(file_path):
     with open(file_path, 'rb') as f:
         reader = PyPDF2.PdfReader(f)
         text += para.text + "\n"
     return text
+# -------------- CHATBOT TAB FUNCTIONS --------------
+def answer_question(question, db_name="default_db"):
     db_path = f"faiss_index_{db_name}"
     if not os.path.exists(db_path):
         logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
+        return "Database non trovato."
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
+    # Perform a similarity search
+    docs = vectorstore.similarity_search(question)
+    if not docs:
+        return "Nessun documento corrispondente alla query."
+    # Collect the document contents
+    results = [doc.page_content for doc in docs]
+    return "\n\n".join(results)
+# -------------- DATABASE MANAGEMENT TAB FUNCTIONS --------------
 def create_database(db_name):
     logging.info(f"Creating database: {db_name}")
     db_path = f"faiss_index_{db_name}"
     except Exception as e:
         logging.error(f"Error listing databases: {e}")
         return []
+# -------------- DOCUMENT MANAGEMENT TAB FUNCTIONS --------------
+def upload_and_index(files, title, author, db_name="default_db"):
+    if not files:
+        return "Nessun file caricato."
+    documents = []
+    doc_metadata = []
+    for file in files:
+        try:
+            if file.name.endswith('.pdf'):
+                text = extract_text_from_pdf(file.name)
+            elif file.name.endswith('.docx'):
+                text = extract_text_from_docx(file.name)
+            else:
+                with open(file.name, 'r', encoding='utf-8') as f:
+                    text = f.read()
+            chunks = text_splitter.split_text(text)
+            # Metadata per il documento
+            doc_meta = DocumentMetadata(
+                filename=os.path.basename(file.name),
+                title=title,
+                author=author,
+                upload_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                chunks=len(chunks)
+            )
+            # Metadata per ogni chunk
+            for i, chunk in enumerate(chunks):
+                chunk_metadata = {
+                    "content": chunk,
+                    "source": os.path.basename(file.name),
+                    "title": title,
+                    "author": author,
+                    "chunk_index": i,
+                    "total_chunks": len(chunks),
+                    "upload_date": doc_meta.upload_date
+                }
+                documents.append(chunk_metadata)
+            doc_metadata.append(doc_meta)
+        except Exception as e:
+            logging.error(f"Errore durante la lettura del file {file.name}: {e}")
+            continue
+    if documents:
+        try:
+            db_path = f"faiss_index_{db_name}"
+            os.makedirs(db_path, exist_ok=True)
+            embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+            texts = [doc["content"] for doc in documents]
+            metadatas = [{k: v for k, v in doc.items() if k != "content"} for doc in documents]
+            vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas)
+            vectorstore.save_local(db_path)
+            # Salva i metadati del documento
+            save_metadata(doc_metadata, db_name)
+            return f"Documenti indicizzati con successo nel database {db_name}!"
+        except Exception as e:
+            logging.error(f"Errore durante l'indicizzazione: {e}")
+            return f"Errore durante l'indicizzazione: {e}"
+    return "Nessun documento processato."
+def list_indexed_files(db_name="default_db"):
+    db_path = f"faiss_index_{db_name}"
+    metadata_file = os.path.join(db_path, "metadata.json")
+    if not os.path.exists(metadata_file):
+        return "Nessun file nel database."
+    try:
+        with open(metadata_file, 'r') as f:
+            metadata = json.load(f)
+        output = []
+        for doc in metadata:
+            output.append(
+                f"📄 {doc['title']}\n"
+                f"   Autore: {doc['author']}\n"
+                f"   File: {doc['filename']}\n"
+                f"   Chunks: {doc['chunks']}\n"
+                f"   Caricato il: {doc['upload_date']}\n"
+            )
+        return "\n".join(output) if output else "Nessun documento nel database."
+    except Exception as e:
+        logging.error(f"Errore nella lettura dei metadati: {e}")
+        return f"Errore nella lettura dei metadati: {e}"
+def delete_file_from_database(file_name, db_name="default_db"):
+    db_path = f"faiss_index_{db_name}"
+    file_list_path = os.path.join(db_path, "file_list.txt")
+    if not os.path.exists(file_list_path):
+        return "Database non trovato."
+    try:
+        # Leggi la lista dei file
+        with open(file_list_path, "r") as f:
+            files = f.readlines()
+        # Rimuovi il file dalla lista
+        files = [f.strip() for f in files if f.strip() != file_name]
+        # Riscrivi la lista aggiornata
+        with open(file_list_path, "w") as f:
+            for file in files:
+                f.write(f"{file}\n")
+        return f"File {file_name} rimosso dal database {db_name}."
+    except Exception as e:
+        return f"Errore durante la rimozione del file: {e}"
+# -------------- DOCUMENT VISUALIZATION TAB FUNCTIONS --------------
+def list_indexed_documents(db_name="default_db"):
+    db_path = f"faiss_index_{db_name}"
+    metadata_file = os.path.join(db_path, "metadata.json")
+    if not os.path.exists(db_path):
+        return f"Il database {db_name} non esiste."
+    if not os.path.exists(metadata_file):
+        return f"Nessun documento nel database {db_name}."
+    try:
+        with open(metadata_file, 'r') as f:
+            metadata = json.load(f)
+        if not metadata:
+            return "Nessun documento trovato nel database."
+        output_lines = ["📚 Documenti nel database:"]
+        for doc in metadata:
+            output_lines.extend([
+                f"\n📄 Documento: {doc['title']}",
+                f"   📝 Autore: {doc['author']}",
+                f"   📁 File: {doc['filename']}",
+                f"   🕒 Caricato il: {doc['upload_date']}",
+                f"   📑 Chunks: {doc['chunks']}"
+            ])
+        result = "\n".join(output_lines)
+        logging.info(f"Documenti trovati nel database {db_name}: {result}")
+        return result
+    except Exception as e:
+        error_msg = f"Errore nella lettura dei metadati: {e}"
+        logging.error(error_msg)
+        return error_msg
+# -------------- NEW FEATURES TAB FUNCTIONS --------------
 def search_documents(query, db_name="default_db"):
     db_path = f"faiss_index_{db_name}"
     if not os.path.exists(db_path):
     # Collect the document contents
     results = [doc.page_content for doc in docs]
+    return "\n\n".join(results)
+def generate_summary(db_name="default_db"):
+    # Placeholder for summarization logic
+    return "This is a summary of the documents in the database."

app/import pytest.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import pytest
+from unittest.mock import Mock, patch
+import os
+from .llm_handling import answer_question
+# app/test_llm_handling.py
+@pytest.fixture
+def mock_embeddings():
+    with patch('langchain_community.embeddings.HuggingFaceEmbeddings') as mock:
+        yield mock
+@pytest.fixture
+def mock_vectorstore():
+    with patch('langchain_community.vectorstores.FAISS') as mock:
+        mock_instance = Mock()
+        mock_instance.as_retriever.return_value = Mock()
+        mock.load_local.return_value = mock_instance
+        yield mock
+@pytest.fixture
+def mock_chat_openai():
+    with patch('langchain_openai.ChatOpenAI') as mock:
+        yield mock
+def test_database_not_found():
+    result = answer_question("test question", "nonexistent_db")
+    assert len(result) == 2
+    assert result[0]["role"] == "user"
+    assert result[0]["content"] == "test question"
+    assert result[1]["role"] == "assistant"
+    assert result[1]["content"] == "Database non trovato"
+@patch('os.path.exists', return_value=True)
+def test_successful_answer(mock_exists, mock_embeddings, mock_vectorstore, mock_chat_openai):
+    mock_qa_chain = Mock()
+    mock_qa_chain.return_value = {"result": "Test answer"}
+    with patch('langchain.chains.RetrievalQA.from_chain_type', return_value=mock_qa_chain):
+        result = answer_question("test question", "test_db")
+        assert len(result) == 2
+        assert result[0]["role"] == "user"
+        assert result[0]["content"] == "test question"
+        assert result[1]["role"] == "assistant"
+        assert result[1]["content"] == "Test answer"
+@patch('os.path.exists', return_value=True)
+def test_error_handling(mock_exists, mock_embeddings):
+    mock_embeddings.side_effect = Exception("Test error")
+    result = answer_question("test question", "test_db")
+    assert len(result) == 2
+    assert result[0]["role"] == "user"
+    assert result[0]["content"] == "test question"
+    assert result[1]["role"] == "assistant"
+    assert "Si è verificato un errore: Test error" in result[1]["content"]

app/llm_handling.py CHANGED Viewed

@@ -1,104 +1,118 @@
 import logging
-from langchain_openai import ChatOpenAI
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain.chains import RetrievalQA
 from app.config import OPENAI_API_KEY
-import gradio as gr  # Da aggiungere per le funzioni che usano gr.Dropdown.update()
-import os
-import shutil
 logging.basicConfig(level=logging.INFO)
-def answer_question(question, db_name, chat_history=[]):
     logging.info(f"Inizio elaborazione domanda: {question} per database: {db_name}")
     try:
-        # 1. Carica il database FAISS
         embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
         db_path = f"faiss_index_{db_name}"
         if not os.path.exists(db_path):
-            return "Database non trovato. Seleziona un database valido."
-        vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
-        # 2. Recupera più contesto rilevante
-        docs = vectorstore.similarity_search(question, k=5)  # Aumentato da 3 a 5
-        context = "\n\n=== Documento ===\n".join([doc.page_content for doc in docs])
-        logging.info(f"Contesto recuperato: {context}")
-        # 3. Prompt migliorato per gestire più contenuti
-        prompt = f"""Analizza i seguenti documenti e fornisci una risposta strutturata nel seguente formato.
-        Mantieni un tono conversazionale e fai riferimento alle domande precedenti se pertinenti.
-        Cronologia della conversazione:
-        {chat_history}
-        SINTESI DEI DOCUMENTI:
-        - Elenca brevemente i principali argomenti trovati nei documenti (massimo 3 punti)
-        ANALISI:
-        - Numero di documenti analizzati: [numero]
-        - Principali concetti rilevanti per la domanda
-        RISPOSTA DETTAGLIATA:
-        - Risposta completa e conversazionale basata sui documenti
-        Documenti di riferimento:
-        {context}
-        Domanda: {question}
-        """
-        # 4. Inizializza l'LLM
-        llm = ChatOpenAI(
-            model_name="gpt-4o-mini",  # Aggiornato a GPT-4 Turbo
-            openai_api_key=OPENAI_API_KEY,
-            temperature=0.7,  # Aumentato per risposte più creative
-            max_tokens=2000   # Aumentato per risposte più complete
         )
-        # 5. Catena RAG ottimizzata con più contesto
-        chain = RetrievalQA.from_chain_type(
-            llm=llm,
-            chain_type="stuff",
-            retriever=vectorstore.as_retriever(
-                search_kwargs={
-                    "k": 8,  # Aumentato da 5 a 8
-                    "fetch_k": 15,  # Aumentato da 10 a 15
-                    "score_threshold": 0.3  # Ridotto da 0.5 a 0.3 per includere più contesto
-                }
-            ),
-            return_source_documents=True,
-            verbose=True
-        )
-        # 6. Ottieni la risposta
-        result = chain({"query": prompt})
-        answer = result["result"]
-        logging.info(f"Risposta generata: {answer}")
-        return answer
     except Exception as e:
         logging.error(f"Errore durante la generazione della risposta: {e}")
-        return f"Si è verificato un errore: {str(e)}"
-# Nel document_handling.py, aggiornare delete_database per restituire anche l'aggiornamento del dropdown
 def delete_database(db_name):
     db_path = f"faiss_index_{db_name}"
     if not os.path.exists(db_path):
-        return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=list_databases())
     try:
         shutil.rmtree(db_path)
         logging.info(f"Database {db_name} eliminato con successo.")
-        return f"Database {db_name} eliminato con successo.", gr.Dropdown.update(choices=list_databases())
     except OSError as e:
         logging.error(f"Impossibile eliminare il database {db_name}: {e}")
-        return f"Impossibile eliminare il database {db_name}: {e}", gr.Dropdown.update(choices=list_databases())
-# Manca la chiamata a ensure_default_db()
 if __name__ == "__main__":
-    ensure_default_db()  # Aggiungere questa chiamata
-    rag_chatbot.launch(share=True)

 import logging
+import os
+import shutil
+from openai import OpenAI
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
+import gradio as gr
 from app.config import OPENAI_API_KEY
+# Se hai funzioni per gestire i database (list_databases, ensure_default_db, ecc.),
+# importale dal modulo corretto:
+# from app.document_handling import list_databases, ensure_default_db
 logging.basicConfig(level=logging.INFO)
+def answer_question(question, db_name, chat_history=None):
+    """
+    Risponde alla domanda 'question' usando i documenti del database 'db_name'.
+    Restituisce una lista di 2 messaggi in formato:
+      [
+        {"role": "user", "content": <domanda>},
+        {"role": "assistant", "content": <risposta>}
+      ]
+    In questa versione, viene effettuato il log dei 'chunk' recuperati durante
+    la ricerca di similarità.
+    """
+    if chat_history is None:
+        chat_history = []
     logging.info(f"Inizio elaborazione domanda: {question} per database: {db_name}")
     try:
         embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
         db_path = f"faiss_index_{db_name}"
         if not os.path.exists(db_path):
+            logging.warning(f"Database {db_name} non trovato.")
+            return [
+                {"role": "user", "content": question},
+                {"role": "assistant", "content": "Database non trovato"}
+            ]
+        # Carica l'indice FAISS
+        vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
+        # Cerca i documenti (chunk) più simili
+        relevant_docs = vectorstore.similarity_search(question, k=3)
+        # Logga i chunk recuperati
+        for idx, doc in enumerate(relevant_docs):
+            logging.info(f"--- Chunk {idx+1} ---")
+            logging.info(doc.page_content)
+            logging.info("---------------------")
+        # Prepara il contesto dai documenti
+        context = "\n".join([doc.page_content for doc in relevant_docs])
+        client = OpenAI(api_key=OPENAI_API_KEY)
+        messages = [
+            {"role": "system", "content": f"Usa questo contesto per rispondere: {context}"},
+            {"role": "user", "content": question}
+        ]
+        # Esegui la chiamata a OpenAI
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            temperature=0,
+            max_tokens=2048
         )
+        answer = response.choices[0].message.content
+        return [
+            {"role": "user", "content": question},
+            {"role": "assistant", "content": answer}
+        ]
     except Exception as e:
         logging.error(f"Errore durante la generazione della risposta: {e}")
+        return [
+            {"role": "user", "content": question},
+            {"role": "assistant", "content": f"Si è verificato un errore: {str(e)}"}
+        ]
 def delete_database(db_name):
+    """
+    Cancella il database FAISS corrispondente a 'db_name'.
+    Restituisce un messaggio di stato e l'aggiornamento del dropdown in Gradio.
+    """
     db_path = f"faiss_index_{db_name}"
     if not os.path.exists(db_path):
+        return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=[])
     try:
         shutil.rmtree(db_path)
         logging.info(f"Database {db_name} eliminato con successo.")
+        # Se hai una funzione list_databases(), usala per aggiornare la dropdown
+        return f"Database {db_name} eliminato con successo.", gr.Dropdown.update(choices=[])
     except OSError as e:
         logging.error(f"Impossibile eliminare il database {db_name}: {e}")
+        return f"Impossibile eliminare il database {db_name}: {e}", gr.Dropdown.update(choices=[])
 if __name__ == "__main__":
+    # Se esiste una funzione ensure_default_db(), decommenta:
+    # ensure_default_db()
+    # Qui potresti testare la funzione answer_question o avviare
+    # il tuo server Gradio. Ad esempio:
+    #
+    # from app.interface import rag_chatbot
+    # rag_chatbot.launch(share=True)
+    pass

app/llm_handling_3.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import logging
+from openai import OpenAI
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from app.config import OPENAI_API_KEY
+import gradio as gr
+import os
+import shutil
+logging.basicConfig(level=logging.INFO)
+def answer_question(question, db_name, chat_history=None):
+    if chat_history is None:
+        chat_history = []
+    logging.info(f"Inizio elaborazione domanda: {question} per database: {db_name}")
+    try:
+        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        db_path = f"faiss_index_{db_name}"
+        if not os.path.exists(db_path):
+            return [{"role": "user", "content": question},
+                   {"role": "assistant", "content": "Database non trovato"}]
+        vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
+        relevant_docs = vectorstore.similarity_search(question, k=3)
+        # Prepara il contesto dai documenti
+        context = "\n".join([doc.page_content for doc in relevant_docs])
+        client = OpenAI(api_key=OPENAI_API_KEY)
+        messages = [
+            {"role": "system", "content": f"Usa questo contesto per rispondere: {context}"},
+            {"role": "user", "content": question}
+        ]
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",  # Cambiato da gpt-4o-mini a un modello supportato
+            messages=messages,
+            temperature=0,
+            max_tokens=2048
+        )
+        answer = response.choices[0].message.content
+        return [
+            {"role": "user", "content": question},
+            {"role": "assistant", "content": answer}
+        ]
+    except Exception as e:
+        logging.error(f"Errore durante la generazione della risposta: {e}")
+        return [
+            {"role": "user", "content": question},
+            {"role": "assistant", "content": f"Si è verificato un errore: {str(e)}"}
+        ]
+# Nel document_handling.py, aggiornare delete_database per restituire anche l'aggiornamento del dropdown
+def delete_database(db_name):
+    db_path = f"faiss_index_{db_name}"
+    if not os.path.exists(db_path):
+        return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=list_databases())
+    try:
+        shutil.rmtree(db_path)
+        logging.info(f"Database {db_name} eliminato con successo.")
+        return f"Database {db_name} eliminato con successo.", gr.Dropdown.update(choices=list_databases())
+    except OSError as e:
+        logging.error(f"Impossibile eliminare il database {db_name}: {e}")
+        return f"Impossibile eliminare il database {db_name}: {e}", gr.Dropdown.update(choices=list_databases())
+# Manca la chiamata a ensure_default_db()
+if __name__ == "__main__":
+    ensure_default_db()  # Aggiungere questa chiamata
+    rag_chatbot.launch(share=True)

app/test_llm_handling.py ADDED Viewed

	@@ -0,0 +1,30 @@

+def test_database_not_found():
+    result = answer_question("test question", "nonexistent_db")
+    assert len(result) == 1
+    assert len(result[0]) == 2
+    assert result[0][0] == "test question"
+    assert result[0][1] == "Database non trovato"
+@patch('os.path.exists', return_value=True)
+def test_successful_answer(mock_exists, mock_embeddings, mock_vectorstore, mock_chat_openai):
+    mock_qa_chain = Mock()
+    mock_qa_chain.return_value = {"result": "Test answer"}
+    with patch('langchain.chains.RetrievalQA.from_chain_type', return_value=mock_qa_chain):
+        result = answer_question("test question", "test_db")
+        assert len(result) == 1
+        assert len(result[0]) == 2
+        assert result[0][0] == "test question"
+        assert result[0][1] == "Test answer"
+@patch('os.path.exists', return_value=True)
+def test_error_handling(mock_exists, mock_embeddings):
+    mock_embeddings.side_effect = Exception("Test error")
+    result = answer_question("test question", "test_db")
+    assert len(result) == 1
+    assert len(result[0]) == 2
+    assert result[0][0] == "test question"
+    assert "Si è verificato un errore: Test error" in result[0][1]

app_1.py ADDED Viewed

	@@ -0,0 +1,275 @@

+import gradio as gr
+from app.document_handling import *
+from app.llm_handling import answer_question
+from app.logging_config import configure_logging
+configure_logging()
+def update_dropdowns():
+    """Aggiorna tutti i dropdown con la lista aggiornata dei database"""
+    databases = list_databases()
+    return [gr.update(choices=databases) for _ in range(6)]
+def extract_text_from_files(files):
+    text = ""
+    for file in files:
+        try:
+            if file.name.endswith('.pdf'):
+                text += extract_text_from_pdf(file.name)
+            elif file.name.endswith('.docx'):
+                text += extract_text_from_docx(file.name)
+            else:
+                with open(file.name, 'r', encoding='utf-8') as f:
+                    text += f.read()
+        except Exception as e:
+            logging.error(f"Errore durante la lettura del file {file.name}: {e}")
+    return text
+with gr.Blocks() as rag_chatbot:
+    gr.Markdown("# Chatbot basato su RAG")
+    databases = list_databases()
+    # Definizione dei dropdown prima del loro utilizzo
+    db_name_upload = gr.State()
+    db_name_list = gr.State()
+    db_name_chat = gr.State()
+    db_name_new = gr.State()
+    modify_db_old_name = gr.State()
+    delete_db_dropdown = gr.State()
+    with gr.Tab("Chatbot"):
+        with gr.Row():
+            with gr.Column(scale=2):
+                db_name_chat = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
+                # Aggiornato il tipo del chatbot
+                chatbot = gr.Chatbot(label="Conversazione", type="messages")
+                with gr.Row():
+                    # Aggiunta upload file direttamente nella chat
+                    file_input = gr.File(
+                        label="Carica PDF per la conversazione",
+                        file_types=[".pdf", ".docx", ".txt"],
+                        file_count="multiple"
+                    )
+                    upload_button = gr.Button("Carica Documenti")
+                question_input = gr.Textbox(
+                    label="Fai una domanda",
+                    placeholder="Scrivi qui la tua domanda...",
+                    lines=2
+                )
+                with gr.Row():
+                    ask_button = gr.Button("Invia")
+                    clear_button = gr.Button("Pulisci Chat")
+        chat_state = gr.State([])
+        def chat_upload_and_respond(files, chat_history, db_name):
+            # Estrai il testo dai file
+            text = extract_text_from_files(files)
+            # Aggiungi il testo alla chat come messaggio dell'utente
+            chat_history.append((None, "📄 Contenuto dei documenti caricati:"))
+            chat_history.append((None, text))
+            return chat_history
+        def respond(message, chat_history, db_name):
+            bot_message = answer_question(message, db_name)
+            chat_history.append((message, bot_message))
+            return "", chat_history
+        def clear_chat():
+            return [], []
+        # Eventi
+        upload_button.click(
+            chat_upload_and_respond,
+            inputs=[file_input, chat_state, db_name_chat],
+            outputs=[chatbot]
+        )
+        ask_button.click(
+            respond,
+            inputs=[question_input, chat_state, db_name_chat],
+            outputs=[question_input, chatbot]
+        )
+        clear_button.click(
+            clear_chat,
+            outputs=[chatbot, chat_state]
+        )
+    with gr.Tab("Gestione Database"):
+        gr.Markdown("## Operazioni sui Database")
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### Crea Database")
+                db_name_input = gr.Textbox(label="Nome Nuovo Database")
+                create_db_button = gr.Button("Crea Database")
+                create_output = gr.Textbox(label="Stato Creazione")
+            with gr.Column():
+                gr.Markdown("### Rinomina Database")
+                modify_db_old_name = gr.Dropdown(choices=databases, label="Database da Rinominare")
+                modify_db_new_name = gr.Textbox(label="Nuovo Nome")
+                modify_db_button = gr.Button("Rinomina Database")
+                modify_output = gr.Textbox(label="Stato Modifica")
+            with gr.Column():
+                gr.Markdown("### Elimina Database")
+                delete_db_dropdown = gr.Dropdown(choices=databases, label="Database da Eliminare")
+                delete_db_button = gr.Button("Elimina Database")
+                delete_output = gr.Textbox(label="Stato Eliminazione")
+        # Eventi per i pulsanti di gestione database
+        create_db_button.click(
+            create_database,
+            inputs=db_name_input,
+            outputs=create_output
+        ).then(
+            update_dropdowns,
+            outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
+        )
+        modify_db_button.click(
+            modify_database,
+            inputs=[modify_db_old_name, modify_db_new_name],
+            outputs=modify_output
+        ).then(
+            update_dropdowns,
+            outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
+        )
+        delete_db_button.click(
+            delete_database,
+            inputs=delete_db_dropdown,
+            outputs=delete_output
+        ).then(
+            update_dropdowns,
+            outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
+        )
+    with gr.Tab("Gestione Documenti"):
+        with gr.Column():
+            gr.Markdown("### Carica Documenti")
+            with gr.Row():
+                file_input = gr.File(
+                    label="Carica i tuoi documenti",
+                    file_types=[".txt", ".pdf", ".docx"],
+                    file_count="multiple"
+                )
+                db_name_upload = gr.Dropdown(
+                    choices=databases,
+                    label="Seleziona Database",
+                    value="default_db"
+                )
+            with gr.Row():
+                title_input = gr.Textbox(label="Titolo del documento")
+                author_input = gr.Textbox(label="Autore")
+            upload_button = gr.Button("Indicizza Documenti")
+            upload_output = gr.Textbox(label="Stato Upload")
+            with gr.Column():
+                gr.Markdown("### Documenti nel Database")
+                db_name_list = gr.Dropdown(
+                    choices=databases,
+                    label="Seleziona Database",
+                    value="default_db"
+                )
+                list_button = gr.Button("Visualizza Files")
+                list_output = gr.Textbox(label="Files nel Database")
+                delete_file_input = gr.Textbox(label="Nome file da eliminare")
+                delete_file_button = gr.Button("Elimina File")
+                delete_file_output = gr.Textbox(label="Stato Eliminazione")
+        # Eventi modificati
+        upload_button.click(
+            upload_and_index,
+            inputs=[file_input, title_input, author_input, db_name_upload],
+            outputs=upload_output
+        ).then(
+            list_indexed_files,
+            inputs=db_name_list,
+            outputs=list_output
+        )
+        list_button.click(
+            list_indexed_files,
+            inputs=db_name_list,
+            outputs=list_output
+        )
+        delete_file_button.click(
+            delete_file_from_database,
+            inputs=[delete_file_input, db_name_list],
+            outputs=delete_file_output
+        ).then(
+            list_indexed_files,
+            inputs=db_name_list,
+            outputs=list_output
+        ).then(
+            update_dropdowns,
+            outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
+        )
+    with gr.Tab("Visualizza Documenti Indicizzati"):
+        with gr.Column():
+            gr.Markdown("### Documenti nel Database")
+            db_name_list = gr.Dropdown(
+                choices=databases,
+                label="Seleziona Database",
+                value="default_db",
+                interactive=True
+            )
+            list_button = gr.Button("Visualizza Documenti")
+            list_output = gr.Textbox(
+                label="Elenco Documenti",
+                lines=10,
+                interactive=False,
+                value="Clicca 'Visualizza Documenti' per vedere l'elenco"
+            )
+            # Evento click con aggiornamento
+            list_button.click(
+                fn=list_indexed_documents,
+                inputs=[db_name_list],
+                outputs=[list_output],
+                api_name="list_docs"
+            )
+    # Adding a new tab for new functionalities
+    with gr.Tab("Nuove Funzionalità"):
+        gr.Markdown("## Cerca Documenti e Genera Riassunto")
+        db_name_new = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
+        search_input = gr.Textbox(label="Inserisci Termine di Ricerca")
+        search_button = gr.Button("Cerca Documenti")
+        search_output = gr.Textbox(label="Documenti Trovati")
+        summary_button = gr.Button("Genera Riassunto")
+        summary_output = gr.Textbox(label="Riassunto")
+        search_button.click(
+            search_documents,
+            inputs=[search_input, db_name_new],
+            outputs=search_output
+        )
+        # summary_button.click(
+        #     generate_summary,
+        #     inputs=db_name_new,
+        #     outputs=summary_output
+        # )
+# Avvio dell'app
+if __name__ == "__main__":
+    rag_chatbot.launch()

faiss_index_Daniele2/index.faiss DELETED Viewed

Binary file (309 kB)

faiss_index_E-learning/index.faiss ADDED Viewed

Binary file (66.1 kB). View file

{faiss_index_Daniele2 → faiss_index_E-learning}/index.pkl RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0106133e5dce16eac6520cb2154cf87e768a4e6ecc950a38b3df93088ed511bf
-size 107706

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0ec4d3c22f17861b941c079acdf82d250fdafd351e9b05ab3877110a3bbdade
+size 25352

faiss_index_E-learning/metadata.json ADDED Viewed

	@@ -0,0 +1,16 @@

+[
+  {
+    "filename": "istruzioni obiettivi di apprendimento.pdf",
+    "title": "Obiettivi",
+    "author": "Daniele",
+    "upload_date": "2024-12-31 19:21:10",
+    "chunks": 6
+  },
+  {
+    "filename": "mastery_Bloom.pdf",
+    "title": "Mastery Learingi",
+    "author": "Bloom",
+    "upload_date": "2024-12-31 20:25:00",
+    "chunks": 43
+  }
+]

requirements.txt CHANGED Viewed

@@ -4,98 +4,125 @@ aiohttp==3.11.11
 aiosignal==1.3.2
 annotated-types==0.7.0
 anyio==4.7.0
 async-timeout==4.0.3
 attrs==24.3.0
 certifi==2024.12.14
 charset-normalizer==3.4.1
 click==8.1.8
-dataclasses-json==0.6.7
 distro==1.9.0
 exceptiongroup==1.2.2
-faiss-cpu==1.9.0.post1
 fastapi==0.115.6
 ffmpy==0.5.0
 filelock==3.16.1
 frozenlist==1.5.0
 fsspec==2024.12.0
 gradio==5.9.1
 gradio_client==1.5.2
 h11==0.14.0
 httpcore==1.0.7
 httpx==0.28.1
-httpx-sse==0.4.0
 huggingface-hub==0.27.0
 idna==3.10
 Jinja2==3.1.5
 jiter==0.8.2
-joblib==1.4.2
 jsonpatch==1.33
 jsonpointer==3.0.0
 langchain==0.3.13
-langchain-community==0.3.13
 langchain-core==0.3.28
-langchain-huggingface==0.1.2
-langchain-openai==0.2.14
 langchain-text-splitters==0.3.4
 langsmith==0.2.7
 lxml==5.3.0
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
-marshmallow==3.23.2
 mdurl==0.1.2
 mpmath==1.3.0
 multidict==6.1.0
-mypy-extensions==1.0.0
-networkx==3.4.2
 numpy==1.26.4
 openai==1.58.1
 orjson==3.10.13
 packaging==24.2
 pandas==2.2.3
 pillow==11.0.0
 propcache==0.2.1
 pydantic==2.10.4
-pydantic-settings==2.7.0
 pydantic_core==2.27.2
 pydub==0.25.1
 Pygments==2.18.0
-PyPDF2==3.0.1
 python-dateutil==2.9.0.post0
 python-docx==1.1.2
 python-dotenv==1.0.1
 python-multipart==0.0.20
 pytz==2024.2
 PyYAML==6.0.2
-regex==2024.11.6
 requests==2.32.3
 requests-toolbelt==1.0.0
 rich==13.9.4
 ruff==0.8.4
 safehttpx==0.1.6
-safetensors==0.4.5
-scikit-learn==1.6.0
-scipy==1.14.1
 semantic-version==2.10.0
-sentence-transformers==3.3.1
 shellingham==1.5.4
 six==1.17.0
 sniffio==1.3.1
 SQLAlchemy==2.0.36
 starlette==0.41.3
-sympy==1.13.1
 tenacity==9.0.0
-threadpoolctl==3.5.0
-tiktoken==0.8.0
 tokenizers==0.21.0
 tomlkit==0.13.2
-torch==2.5.1
 tqdm==4.67.1
-transformers==4.47.1
 typer==0.15.1
-typing-inspect==0.9.0
 typing_extensions==4.12.2
 tzdata==2024.2
 urllib3==2.3.0
 uvicorn==0.34.0
 websockets==14.1
 yarl==1.18.3

 aiosignal==1.3.2
 annotated-types==0.7.0
 anyio==4.7.0
+asgiref==3.8.1
 async-timeout==4.0.3
 attrs==24.3.0
+backoff==2.2.1
+bcrypt==4.2.1
+build==1.2.2.post1
+cachetools==5.5.0
 certifi==2024.12.14
 charset-normalizer==3.4.1
+chroma-hnswlib==0.7.6
+chromadb==0.6.0
 click==8.1.8
+coloredlogs==15.0.1
+Deprecated==1.2.15
 distro==1.9.0
+durationpy==0.9
 exceptiongroup==1.2.2
 fastapi==0.115.6
 ffmpy==0.5.0
 filelock==3.16.1
+flatbuffers==24.12.23
 frozenlist==1.5.0
 fsspec==2024.12.0
+google-auth==2.37.0
+googleapis-common-protos==1.66.0
 gradio==5.9.1
 gradio_client==1.5.2
+grpcio==1.68.1
 h11==0.14.0
 httpcore==1.0.7
+httptools==0.6.4
 httpx==0.28.1
 huggingface-hub==0.27.0
+humanfriendly==10.0
 idna==3.10
+importlib_metadata==8.5.0
+importlib_resources==6.4.5
 Jinja2==3.1.5
 jiter==0.8.2
 jsonpatch==1.33
 jsonpointer==3.0.0
+kubernetes==31.0.0
 langchain==0.3.13
 langchain-core==0.3.28
 langchain-text-splitters==0.3.4
 langsmith==0.2.7
 lxml==5.3.0
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
 mdurl==0.1.2
+mmh3==5.0.1
+monotonic==1.6
 mpmath==1.3.0
 multidict==6.1.0
 numpy==1.26.4
+oauthlib==3.2.2
+onnxruntime==1.20.1
 openai==1.58.1
+opentelemetry-api==1.29.0
+opentelemetry-exporter-otlp-proto-common==1.29.0
+opentelemetry-exporter-otlp-proto-grpc==1.29.0
+opentelemetry-instrumentation==0.50b0
+opentelemetry-instrumentation-asgi==0.50b0
+opentelemetry-instrumentation-fastapi==0.50b0
+opentelemetry-proto==1.29.0
+opentelemetry-sdk==1.29.0
+opentelemetry-semantic-conventions==0.50b0
+opentelemetry-util-http==0.50b0
 orjson==3.10.13
+overrides==7.7.0
 packaging==24.2
 pandas==2.2.3
 pillow==11.0.0
+posthog==3.7.4
 propcache==0.2.1
+protobuf==5.29.2
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
 pydantic==2.10.4
 pydantic_core==2.27.2
 pydub==0.25.1
 Pygments==2.18.0
+pypdf==5.1.0
+PyPika==0.48.9
+pyproject_hooks==1.2.0
 python-dateutil==2.9.0.post0
 python-docx==1.1.2
 python-dotenv==1.0.1
 python-multipart==0.0.20
 pytz==2024.2
 PyYAML==6.0.2
 requests==2.32.3
+requests-oauthlib==2.0.0
 requests-toolbelt==1.0.0
 rich==13.9.4
+rsa==4.9
 ruff==0.8.4
 safehttpx==0.1.6
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0
 sniffio==1.3.1
 SQLAlchemy==2.0.36
 starlette==0.41.3
+sympy==1.13.3
 tenacity==9.0.0
 tokenizers==0.21.0
+tomli==2.2.1
 tomlkit==0.13.2
 tqdm==4.67.1
 typer==0.15.1
 typing_extensions==4.12.2
 tzdata==2024.2
 urllib3==2.3.0
 uvicorn==0.34.0
+uvloop==0.21.0
+watchfiles==1.0.3
+websocket-client==1.8.0
 websockets==14.1
+wrapt==1.17.0
 yarl==1.18.3
+zipp==3.21.0