Nugh75 commited on
Commit
65416ea
·
1 Parent(s): d3b9bfa

updatate interfaccia chatbot

Browse files

bisogna aggiornare gli altri tab

.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
app.py CHANGED
@@ -1,23 +1,43 @@
1
  import gradio as gr
2
- from app.document_handling import *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from app.llm_handling import answer_question
4
  from app.logging_config import configure_logging
5
 
6
  configure_logging()
7
 
8
  def update_dropdowns():
9
- """Aggiorna tutti i dropdown con la lista aggiornata dei database"""
10
  databases = list_databases()
 
11
  return [gr.update(choices=databases) for _ in range(6)]
12
 
13
  def extract_text_from_files(files):
 
14
  text = ""
15
  for file in files:
16
  try:
17
  if file.name.endswith('.pdf'):
18
- text += extract_text_from_pdf(file.name)
19
  elif file.name.endswith('.docx'):
20
- text += extract_text_from_docx(file.name)
21
  else:
22
  with open(file.name, 'r', encoding='utf-8') as f:
23
  text += f.read()
@@ -25,12 +45,14 @@ def extract_text_from_files(files):
25
  logging.error(f"Errore durante la lettura del file {file.name}: {e}")
26
  return text
27
 
 
28
  with gr.Blocks() as rag_chatbot:
29
  gr.Markdown("# Chatbot basato su RAG")
30
 
31
  databases = list_databases()
32
-
33
- # Definizione dei dropdown prima del loro utilizzo
 
34
  db_name_upload = gr.State()
35
  db_name_list = gr.State()
36
  db_name_chat = gr.State()
@@ -38,152 +60,260 @@ with gr.Blocks() as rag_chatbot:
38
  modify_db_old_name = gr.State()
39
  delete_db_dropdown = gr.State()
40
 
41
-
 
 
42
  with gr.Tab("Chatbot"):
43
  with gr.Row():
44
  with gr.Column(scale=2):
45
- db_name_chat = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
46
- chatbot = gr.Chatbot(label="Conversazione")
47
-
48
- with gr.Row():
49
- # Aggiunta upload file direttamente nella chat
50
- file_input = gr.File(
51
- label="Carica PDF per la conversazione",
52
- file_types=[".pdf", ".docx", ".txt"],
53
- file_count="multiple"
54
- )
55
- upload_button = gr.Button("Carica Documenti")
56
 
 
57
  question_input = gr.Textbox(
58
  label="Fai una domanda",
59
  placeholder="Scrivi qui la tua domanda...",
60
  lines=2
61
  )
62
-
63
  with gr.Row():
64
  ask_button = gr.Button("Invia")
65
- clear_button = gr.Button("Pulisci Chat")
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
 
67
  chat_state = gr.State([])
68
 
 
 
 
69
  def chat_upload_and_respond(files, chat_history, db_name):
 
 
 
 
70
  # Estrai il testo dai file
71
  text = extract_text_from_files(files)
72
 
73
- # Aggiungi il testo alla chat come messaggio dell'utente
74
- chat_history.append((None, "📄 Contenuto dei documenti caricati:"))
75
- chat_history.append((None, text))
 
 
76
 
77
  return chat_history
78
 
79
  def respond(message, chat_history, db_name):
80
- bot_message = answer_question(message, db_name)
81
- chat_history.append((message, bot_message))
 
 
 
 
 
 
 
 
82
  return "", chat_history
83
 
84
  def clear_chat():
 
85
  return [], []
86
 
87
- # Eventi
 
 
88
  upload_button.click(
89
- chat_upload_and_respond,
90
  inputs=[file_input, chat_state, db_name_chat],
91
- outputs=[chatbot]
92
  )
93
 
94
  ask_button.click(
95
- respond,
96
  inputs=[question_input, chat_state, db_name_chat],
97
  outputs=[question_input, chatbot]
98
  )
99
 
100
  clear_button.click(
101
- clear_chat,
102
  outputs=[chatbot, chat_state]
103
  )
104
 
105
- with gr.Tab("Creazione Database"):
106
- db_name_input = gr.Textbox(label="Nome Nuovo Database")
107
- create_db_button = gr.Button("Crea Database")
108
- create_output = gr.Textbox(label="Stato Creazione")
 
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  create_db_button.click(
111
- create_database,
112
- inputs=db_name_input,
113
- outputs=create_output
114
  ).then(
115
  update_dropdowns,
116
- outputs=[
117
- db_name_upload,
118
- db_name_list,
119
- db_name_chat,
120
- db_name_new,
121
- modify_db_old_name,
122
- delete_db_dropdown
123
- ]
124
  )
125
 
126
- with gr.Tab("Modifica Database"):
127
- modify_db_old_name = gr.Dropdown(choices=databases, label="Database da Rinominare")
128
- modify_db_new_name = gr.Textbox(label="Nuovo Nome")
129
- modify_db_button = gr.Button("Rinomina Database")
130
- modify_output = gr.Textbox(label="Stato Modifica")
131
-
132
  modify_db_button.click(
133
- modify_database,
134
- inputs=[modify_db_old_name, modify_db_new_name],
135
  outputs=modify_output
136
  ).then(
137
  update_dropdowns,
138
- outputs=[
139
- db_name_upload,
140
- db_name_list,
141
- db_name_chat,
142
- db_name_new,
143
- modify_db_old_name,
144
- delete_db_dropdown
145
- ]
146
  )
147
 
148
- with gr.Tab("Eliminazione Database"):
149
- delete_db_dropdown = gr.Dropdown(choices=databases, label="Database da Eliminare")
150
- delete_db_button = gr.Button("Elimina Database")
151
- delete_output = gr.Textbox(label="Stato Eliminazione")
152
-
153
  delete_db_button.click(
154
- delete_database,
155
- inputs=delete_db_dropdown,
156
  outputs=delete_output
157
  ).then(
158
  update_dropdowns,
159
- outputs=[
160
- db_name_upload,
161
- db_name_list,
162
- db_name_chat,
163
- db_name_new,
164
- modify_db_old_name,
165
- delete_db_dropdown
166
- ]
167
  )
168
 
169
- with gr.Tab("Carica Documenti"):
170
- file_input = gr.File(label="Carica i tuoi documenti", file_types=[".txt", ".pdf", ".docx"], file_count="multiple")
171
- db_name_upload = gr.Dropdown(choices=databases, label="Seleziona o Crea Database", value="default_db")
172
- upload_button = gr.Button("Indicizza Documenti")
173
- upload_output = gr.Textbox(label="Stato")
174
 
175
- upload_button.click(upload_and_index, inputs=[file_input, db_name_upload], outputs=upload_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
- with gr.Tab("Visualizza Documenti Indicizzati"):
178
- db_name_list = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
179
- list_button = gr.Button("Visualizza Documenti")
180
- list_output = gr.Textbox(label="Elenco Documenti")
 
 
 
 
 
 
181
 
182
- list_button.click(list_indexed_documents, inputs=db_name_list, outputs=list_output)
 
 
 
 
183
 
184
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- # Adding a new tab for new functionalities
 
 
187
  with gr.Tab("Nuove Funzionalità"):
188
  gr.Markdown("## Cerca Documenti e Genera Riassunto")
189
 
@@ -200,13 +330,13 @@ with gr.Blocks() as rag_chatbot:
200
  inputs=[search_input, db_name_new],
201
  outputs=search_output
202
  )
203
-
204
  # summary_button.click(
205
  # generate_summary,
206
  # inputs=db_name_new,
207
  # outputs=summary_output
208
  # )
209
 
210
- # Avvio dell'app su Hugging Face
211
  if __name__ == "__main__":
212
- rag_chatbot.launch(share=True)
 
1
  import gradio as gr
2
+ import logging
3
+
4
+ # Ipotizziamo che tu abbia queste funzioni nel tuo progetto:
5
+ # - list_databases(), create_database(), modify_database(), delete_database()...
6
+ # - list_indexed_files(), upload_and_index(), delete_file_from_database(), etc.
7
+ # - search_documents(), list_indexed_documents()...
8
+ #
9
+ # Se hanno nomi o posizioni diverse, adatta gli import di conseguenza
10
+ from app.document_handling import (
11
+ list_databases,
12
+ create_database,
13
+ modify_database,
14
+ delete_database,
15
+ upload_and_index,
16
+ list_indexed_files,
17
+ delete_file_from_database,
18
+ list_indexed_documents,
19
+ search_documents,
20
+ )
21
  from app.llm_handling import answer_question
22
  from app.logging_config import configure_logging
23
 
24
  configure_logging()
25
 
26
  def update_dropdowns():
27
+ """Aggiorna tutti i dropdown con la lista aggiornata dei database."""
28
  databases = list_databases()
29
+ # Ritorniamo 6 update() perché nel codice ci sono 6 dropdown da sincronizzare
30
  return [gr.update(choices=databases) for _ in range(6)]
31
 
32
  def extract_text_from_files(files):
33
+ """Estrae e concatena il testo da PDF, DOCX e TXT."""
34
  text = ""
35
  for file in files:
36
  try:
37
  if file.name.endswith('.pdf'):
38
+ text += extract_text_from_pdf(file.name) # Definita in document_handling
39
  elif file.name.endswith('.docx'):
40
+ text += extract_text_from_docx(file.name) # Definita in document_handling
41
  else:
42
  with open(file.name, 'r', encoding='utf-8') as f:
43
  text += f.read()
 
45
  logging.error(f"Errore durante la lettura del file {file.name}: {e}")
46
  return text
47
 
48
+
49
  with gr.Blocks() as rag_chatbot:
50
  gr.Markdown("# Chatbot basato su RAG")
51
 
52
  databases = list_databases()
53
+
54
+ # Questi State() servono per la gestione dei dropdown.
55
+ # Se non ti servono come stati separati, puoi anche rimuoverli.
56
  db_name_upload = gr.State()
57
  db_name_list = gr.State()
58
  db_name_chat = gr.State()
 
60
  modify_db_old_name = gr.State()
61
  delete_db_dropdown = gr.State()
62
 
63
+ # =============================================
64
+ # TAB: Chatbot
65
+ # =============================================
66
  with gr.Tab("Chatbot"):
67
  with gr.Row():
68
  with gr.Column(scale=2):
69
+ # Dropdown per selezionare il DB
70
+ db_name_chat = gr.Dropdown(
71
+ choices=databases,
72
+ label="Seleziona Database",
73
+ value="default_db"
74
+ )
75
+
76
+ # Chatbot component
77
+ chatbot = gr.Chatbot(label="Conversazione", type="messages")
 
 
78
 
79
+ # Input domanda
80
  question_input = gr.Textbox(
81
  label="Fai una domanda",
82
  placeholder="Scrivi qui la tua domanda...",
83
  lines=2
84
  )
85
+ # Bottoni azione
86
  with gr.Row():
87
  ask_button = gr.Button("Invia")
88
+ clear_button = gr.Button("Pulisci Chat")
89
+
90
+ # File upload con dimensioni ridotte
91
+ with gr.Row():
92
+ file_input = gr.File(
93
+ label="Carica PDF/Docx/TXT per la conversazione",
94
+ file_types=[".pdf", ".docx", ".txt"],
95
+ file_count="multiple",
96
+ height="100px", # Altezza ridotta
97
+ scale=3 # Riduce la larghezza relativa
98
+ )
99
+ upload_button = gr.Button("Carica Documenti", scale=1)
100
+
101
+
102
 
103
+ # Stato chat
104
  chat_state = gr.State([])
105
 
106
+ # ----------------------
107
+ # FUNZIONI DI CALLBACK
108
+ # ----------------------
109
  def chat_upload_and_respond(files, chat_history, db_name):
110
+ # Se chat_history è None, inizializziamo
111
+ if chat_history is None:
112
+ chat_history = []
113
+
114
  # Estrai il testo dai file
115
  text = extract_text_from_files(files)
116
 
117
+ # Aggiungo un messaggio "assistant" che mostra il testo caricato
118
+ chat_history.append({
119
+ "role": "assistant",
120
+ "content": f"📄 Contenuto dei documenti caricati:\n{text}"
121
+ })
122
 
123
  return chat_history
124
 
125
  def respond(message, chat_history, db_name):
126
+ if chat_history is None:
127
+ chat_history = []
128
+
129
+ # `answer_question` restituisce due messaggi (user + assistant) in lista
130
+ new_messages = answer_question(message, db_name)
131
+
132
+ # Li aggiungiamo in coda alla history
133
+ chat_history.extend(new_messages)
134
+
135
+ # Ritorniamo l'input svuotato (per pulire il Textbox) e la nuova history
136
  return "", chat_history
137
 
138
  def clear_chat():
139
+ # Svuota la chat
140
  return [], []
141
 
142
+ # ------------------
143
+ # EVENTI BOTTONE
144
+ # ------------------
145
  upload_button.click(
146
+ fn=chat_upload_and_respond,
147
  inputs=[file_input, chat_state, db_name_chat],
148
+ outputs=chatbot
149
  )
150
 
151
  ask_button.click(
152
+ fn=respond,
153
  inputs=[question_input, chat_state, db_name_chat],
154
  outputs=[question_input, chatbot]
155
  )
156
 
157
  clear_button.click(
158
+ fn=clear_chat,
159
  outputs=[chatbot, chat_state]
160
  )
161
 
162
+
163
+ # =============================================
164
+ # TAB: Gestione Database
165
+ # =============================================
166
+ with gr.Tab("Gestione Database"):
167
+ gr.Markdown("## Operazioni sui Database")
168
 
169
+ with gr.Row():
170
+ with gr.Column():
171
+ gr.Markdown("### Crea Database")
172
+ db_name_input = gr.Textbox(label="Nome Nuovo Database")
173
+ create_db_button = gr.Button("Crea Database")
174
+ create_output = gr.Textbox(label="Stato Creazione")
175
+
176
+ with gr.Column():
177
+ gr.Markdown("### Rinomina Database")
178
+ modify_db_old_name = gr.Dropdown(choices=databases, label="Database da Rinominare")
179
+ modify_db_new_name = gr.Textbox(label="Nuovo Nome")
180
+ modify_db_button = gr.Button("Rinomina Database")
181
+ modify_output = gr.Textbox(label="Stato Modifica")
182
+
183
+ with gr.Column():
184
+ gr.Markdown("### Elimina Database")
185
+ delete_db_dropdown = gr.Dropdown(choices=databases, label="Database da Eliminare")
186
+ delete_db_button = gr.Button("Elimina Database")
187
+ delete_output = gr.Textbox(label="Stato Eliminazione")
188
+
189
+ # Eventi per i pulsanti di gestione DB
190
  create_db_button.click(
191
+ create_database, # funzione
192
+ inputs=db_name_input, # input
193
+ outputs=create_output # output
194
  ).then(
195
  update_dropdowns,
196
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
 
 
 
 
 
 
 
197
  )
198
 
 
 
 
 
 
 
199
  modify_db_button.click(
200
+ modify_database,
201
+ inputs=[modify_db_old_name, modify_db_new_name],
202
  outputs=modify_output
203
  ).then(
204
  update_dropdowns,
205
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
 
 
 
 
 
 
 
206
  )
207
 
 
 
 
 
 
208
  delete_db_button.click(
209
+ delete_database,
210
+ inputs=delete_db_dropdown,
211
  outputs=delete_output
212
  ).then(
213
  update_dropdowns,
214
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
 
 
 
 
 
 
 
215
  )
216
 
 
 
 
 
 
217
 
218
+ # =============================================
219
+ # TAB: Gestione Documenti
220
+ # =============================================
221
+ with gr.Tab("Gestione Documenti"):
222
+ with gr.Column():
223
+ gr.Markdown("### Carica Documenti")
224
+ with gr.Row():
225
+ file_input = gr.File(
226
+ label="Carica i tuoi documenti",
227
+ file_types=[".txt", ".pdf", ".docx"],
228
+ file_count="multiple"
229
+ )
230
+ db_name_upload = gr.Dropdown(
231
+ choices=databases,
232
+ label="Seleziona Database",
233
+ value="default_db"
234
+ )
235
+
236
+ with gr.Row():
237
+ title_input = gr.Textbox(label="Titolo del documento")
238
+ author_input = gr.Textbox(label="Autore")
239
+
240
+ upload_button = gr.Button("Indicizza Documenti")
241
+ upload_output = gr.Textbox(label="Stato Upload")
242
+
243
+ with gr.Column():
244
+ gr.Markdown("### Documenti nel Database")
245
+ db_name_list = gr.Dropdown(
246
+ choices=databases,
247
+ label="Seleziona Database",
248
+ value="default_db"
249
+ )
250
+ list_button = gr.Button("Visualizza Files")
251
+ list_output = gr.Textbox(label="Files nel Database")
252
+ delete_file_input = gr.Textbox(label="Nome file da eliminare")
253
+ delete_file_button = gr.Button("Elimina File")
254
+ delete_file_output = gr.Textbox(label="Stato Eliminazione")
255
 
256
+ # Eventi
257
+ upload_button.click(
258
+ upload_and_index,
259
+ inputs=[file_input, title_input, author_input, db_name_upload],
260
+ outputs=upload_output
261
+ ).then(
262
+ list_indexed_files,
263
+ inputs=db_name_list,
264
+ outputs=list_output
265
+ )
266
 
267
+ list_button.click(
268
+ list_indexed_files,
269
+ inputs=db_name_list,
270
+ outputs=list_output
271
+ )
272
 
273
+ delete_file_button.click(
274
+ delete_file_from_database,
275
+ inputs=[delete_file_input, db_name_list],
276
+ outputs=delete_file_output
277
+ ).then(
278
+ list_indexed_files,
279
+ inputs=db_name_list,
280
+ outputs=list_output
281
+ ).then(
282
+ update_dropdowns,
283
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
284
+ )
285
+
286
+
287
+ # =============================================
288
+ # TAB: Visualizza Documenti Indicizzati
289
+ # =============================================
290
+ with gr.Tab("Visualizza Documenti Indicizzati"):
291
+ with gr.Column():
292
+ gr.Markdown("### Documenti nel Database")
293
+ db_name_list = gr.Dropdown(
294
+ choices=databases,
295
+ label="Seleziona Database",
296
+ value="default_db",
297
+ interactive=True
298
+ )
299
+ list_button = gr.Button("Visualizza Documenti")
300
+ list_output = gr.Textbox(
301
+ label="Elenco Documenti",
302
+ lines=10,
303
+ interactive=False,
304
+ value="Clicca 'Visualizza Documenti' per vedere l'elenco"
305
+ )
306
+
307
+ list_button.click(
308
+ fn=list_indexed_documents,
309
+ inputs=[db_name_list],
310
+ outputs=[list_output],
311
+ api_name="list_docs"
312
+ )
313
 
314
+ # =============================================
315
+ # TAB: Nuove Funzionalità
316
+ # =============================================
317
  with gr.Tab("Nuove Funzionalità"):
318
  gr.Markdown("## Cerca Documenti e Genera Riassunto")
319
 
 
330
  inputs=[search_input, db_name_new],
331
  outputs=search_output
332
  )
333
+ # Esempio di eventuale generazione riassunto
334
  # summary_button.click(
335
  # generate_summary,
336
  # inputs=db_name_new,
337
  # outputs=summary_output
338
  # )
339
 
340
+ # Avvio dell'app
341
  if __name__ == "__main__":
342
+ rag_chatbot.launch()
app/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/__init__.cpython-310.pyc and b/app/__pycache__/__init__.cpython-310.pyc differ
 
app/__pycache__/config.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/config.cpython-310.pyc and b/app/__pycache__/config.cpython-310.pyc differ
 
app/__pycache__/document_handling.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/document_handling.cpython-310.pyc and b/app/__pycache__/document_handling.cpython-310.pyc differ
 
app/__pycache__/llm_handling.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/llm_handling.cpython-310.pyc and b/app/__pycache__/llm_handling.cpython-310.pyc differ
 
app/__pycache__/logging_config.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/logging_config.cpython-310.pyc and b/app/__pycache__/logging_config.cpython-310.pyc differ
 
app/config.py CHANGED
@@ -1,4 +1,10 @@
1
  import os
 
 
 
 
2
 
3
  # Configurazione del modello
4
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
 
 
1
  import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Carica le variabili d'ambiente dal file .env
5
+ load_dotenv()
6
 
7
  # Configurazione del modello
8
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
9
+ if not OPENAI_API_KEY:
10
+ raise ValueError("OPENAI_API_KEY non trovata. Verifica il file .env")
app/document_handling.py CHANGED
@@ -7,10 +7,45 @@ import shutil
7
  import PyPDF2
8
  from docx import Document
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
 
10
 
11
  # Initialize the text splitter
12
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def extract_text_from_pdf(file_path):
15
  with open(file_path, 'rb') as f:
16
  reader = PyPDF2.PdfReader(f)
@@ -26,57 +61,27 @@ def extract_text_from_docx(file_path):
26
  text += para.text + "\n"
27
  return text
28
 
29
- def upload_and_index(files, db_name="default_db"):
30
- if not files:
31
- logging.warning("Nessun file fornito per l'indicizzazione.")
32
- return "Nessun file caricato."
33
-
34
- documents = []
35
- for file in files:
36
- try:
37
- if file.name.endswith('.pdf'):
38
- text = extract_text_from_pdf(file.name)
39
- elif file.name.endswith('.docx'):
40
- text = extract_text_from_docx(file.name)
41
- else:
42
- with open(file.name, 'r', encoding='utf-8') as f:
43
- text = f.read()
44
- # Split the text into chunks
45
- chunks = text_splitter.split_text(text)
46
- documents.extend(chunks)
47
- except Exception as e:
48
- logging.error(f"Errore durante la lettura del file {file.name}: {e}")
49
- continue
50
-
51
- # Creazione dell'indice con FAISS e Hugging Face embeddings
52
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
53
- vectorstore = FAISS.from_texts(documents, embeddings)
54
- db_path = f"faiss_index_{db_name}"
55
- vectorstore.save_local(db_path)
56
- logging.info(f"Documenti indicizzati con successo nel database {db_name}.")
57
- return f"Documenti indicizzati con successo nel database {db_name}!"
58
-
59
- def list_indexed_documents(db_name="default_db"):
60
  db_path = f"faiss_index_{db_name}"
61
  if not os.path.exists(db_path):
62
  logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
63
- return "Nessun documento indicizzato."
64
-
65
- # Carica l'indice FAISS
66
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
67
  vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
 
 
 
 
 
 
 
 
 
 
68
 
69
- # Ottieni i documenti dall'indice
70
- documents = [doc.page_content for doc in vectorstore.docstore._dict.values()]
71
- num_chunks = len(documents)
72
-
73
- if not documents:
74
- return "Nessun documento trovato nell'indice."
75
-
76
- # Formatta la lista dei documenti
77
- document_list = "\n".join([f"{i+1}. {doc}" for i, doc in enumerate(documents)])
78
- return f"Documenti nel database {db_name} (Numero di chunk: {num_chunks}):\n{document_list}"
79
-
80
  def create_database(db_name):
81
  logging.info(f"Creating database: {db_name}")
82
  db_path = f"faiss_index_{db_name}"
@@ -132,11 +137,165 @@ def list_databases():
132
  except Exception as e:
133
  logging.error(f"Error listing databases: {e}")
134
  return []
 
 
 
 
 
 
 
 
135
 
136
- def generate_summary(db_name="default_db"):
137
- # Placeholder for summarization logic
138
- return "This is a summary of the documents in the database."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  def search_documents(query, db_name="default_db"):
141
  db_path = f"faiss_index_{db_name}"
142
  if not os.path.exists(db_path):
@@ -154,4 +313,8 @@ def search_documents(query, db_name="default_db"):
154
 
155
  # Collect the document contents
156
  results = [doc.page_content for doc in docs]
157
- return "\n\n".join(results)
 
 
 
 
 
7
  import PyPDF2
8
  from docx import Document
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from dataclasses import dataclass
11
+ import json
12
+ from datetime import datetime
13
 
14
  # Initialize the text splitter
15
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
16
 
17
+ # -------------- UTILITY FUNCTIONS --------------
18
+ @dataclass
19
+ class DocumentMetadata:
20
+ filename: str
21
+ title: str
22
+ author: str
23
+ upload_date: str
24
+ chunks: int
25
+
26
+ def to_dict(self):
27
+ return {
28
+ "filename": self.filename,
29
+ "title": self.title,
30
+ "author": self.author,
31
+ "upload_date": self.upload_date,
32
+ "chunks": self.chunks
33
+ }
34
+
35
+ def save_metadata(metadata_list, db_name):
36
+ db_path = f"faiss_index_{db_name}"
37
+ metadata_file = os.path.join(db_path, "metadata.json")
38
+
39
+ existing_metadata = []
40
+ if os.path.exists(metadata_file):
41
+ with open(metadata_file, 'r') as f:
42
+ existing_metadata = json.load(f)
43
+
44
+ existing_metadata.extend([m.to_dict() for m in metadata_list])
45
+
46
+ with open(metadata_file, 'w') as f:
47
+ json.dump(existing_metadata, f, indent=2)
48
+
49
  def extract_text_from_pdf(file_path):
50
  with open(file_path, 'rb') as f:
51
  reader = PyPDF2.PdfReader(f)
 
61
  text += para.text + "\n"
62
  return text
63
 
64
+ # -------------- CHATBOT TAB FUNCTIONS --------------
65
+ def answer_question(question, db_name="default_db"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  db_path = f"faiss_index_{db_name}"
67
  if not os.path.exists(db_path):
68
  logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
69
+ return "Database non trovato."
70
+
 
71
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
72
  vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
73
+
74
+ # Perform a similarity search
75
+ docs = vectorstore.similarity_search(question)
76
+
77
+ if not docs:
78
+ return "Nessun documento corrispondente alla query."
79
+
80
+ # Collect the document contents
81
+ results = [doc.page_content for doc in docs]
82
+ return "\n\n".join(results)
83
 
84
+ # -------------- DATABASE MANAGEMENT TAB FUNCTIONS --------------
 
 
 
 
 
 
 
 
 
 
85
  def create_database(db_name):
86
  logging.info(f"Creating database: {db_name}")
87
  db_path = f"faiss_index_{db_name}"
 
137
  except Exception as e:
138
  logging.error(f"Error listing databases: {e}")
139
  return []
140
+
141
+ # -------------- DOCUMENT MANAGEMENT TAB FUNCTIONS --------------
142
+ def upload_and_index(files, title, author, db_name="default_db"):
143
+ if not files:
144
+ return "Nessun file caricato."
145
+
146
+ documents = []
147
+ doc_metadata = []
148
 
149
+ for file in files:
150
+ try:
151
+ if file.name.endswith('.pdf'):
152
+ text = extract_text_from_pdf(file.name)
153
+ elif file.name.endswith('.docx'):
154
+ text = extract_text_from_docx(file.name)
155
+ else:
156
+ with open(file.name, 'r', encoding='utf-8') as f:
157
+ text = f.read()
158
+
159
+ chunks = text_splitter.split_text(text)
160
+
161
+ # Metadata per il documento
162
+ doc_meta = DocumentMetadata(
163
+ filename=os.path.basename(file.name),
164
+ title=title,
165
+ author=author,
166
+ upload_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
167
+ chunks=len(chunks)
168
+ )
169
+
170
+ # Metadata per ogni chunk
171
+ for i, chunk in enumerate(chunks):
172
+ chunk_metadata = {
173
+ "content": chunk,
174
+ "source": os.path.basename(file.name),
175
+ "title": title,
176
+ "author": author,
177
+ "chunk_index": i,
178
+ "total_chunks": len(chunks),
179
+ "upload_date": doc_meta.upload_date
180
+ }
181
+ documents.append(chunk_metadata)
182
+
183
+ doc_metadata.append(doc_meta)
184
+
185
+ except Exception as e:
186
+ logging.error(f"Errore durante la lettura del file {file.name}: {e}")
187
+ continue
188
+
189
+ if documents:
190
+ try:
191
+ db_path = f"faiss_index_{db_name}"
192
+ os.makedirs(db_path, exist_ok=True)
193
+
194
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
195
+ texts = [doc["content"] for doc in documents]
196
+ metadatas = [{k: v for k, v in doc.items() if k != "content"} for doc in documents]
197
+
198
+ vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas)
199
+ vectorstore.save_local(db_path)
200
+
201
+ # Salva i metadati del documento
202
+ save_metadata(doc_metadata, db_name)
203
+
204
+ return f"Documenti indicizzati con successo nel database {db_name}!"
205
+ except Exception as e:
206
+ logging.error(f"Errore durante l'indicizzazione: {e}")
207
+ return f"Errore durante l'indicizzazione: {e}"
208
+
209
+ return "Nessun documento processato."
210
+
211
+ def list_indexed_files(db_name="default_db"):
212
+ db_path = f"faiss_index_{db_name}"
213
+ metadata_file = os.path.join(db_path, "metadata.json")
214
+
215
+ if not os.path.exists(metadata_file):
216
+ return "Nessun file nel database."
217
+
218
+ try:
219
+ with open(metadata_file, 'r') as f:
220
+ metadata = json.load(f)
221
+
222
+ output = []
223
+ for doc in metadata:
224
+ output.append(
225
+ f"📄 {doc['title']}\n"
226
+ f" Autore: {doc['author']}\n"
227
+ f" File: {doc['filename']}\n"
228
+ f" Chunks: {doc['chunks']}\n"
229
+ f" Caricato il: {doc['upload_date']}\n"
230
+ )
231
+
232
+ return "\n".join(output) if output else "Nessun documento nel database."
233
+ except Exception as e:
234
+ logging.error(f"Errore nella lettura dei metadati: {e}")
235
+ return f"Errore nella lettura dei metadati: {e}"
236
+
237
+ def delete_file_from_database(file_name, db_name="default_db"):
238
+ db_path = f"faiss_index_{db_name}"
239
+ file_list_path = os.path.join(db_path, "file_list.txt")
240
+
241
+ if not os.path.exists(file_list_path):
242
+ return "Database non trovato."
243
+
244
+ try:
245
+ # Leggi la lista dei file
246
+ with open(file_list_path, "r") as f:
247
+ files = f.readlines()
248
+
249
+ # Rimuovi il file dalla lista
250
+ files = [f.strip() for f in files if f.strip() != file_name]
251
+
252
+ # Riscrivi la lista aggiornata
253
+ with open(file_list_path, "w") as f:
254
+ for file in files:
255
+ f.write(f"{file}\n")
256
+
257
+ return f"File {file_name} rimosso dal database {db_name}."
258
+ except Exception as e:
259
+ return f"Errore durante la rimozione del file: {e}"
260
 
261
+ # -------------- DOCUMENT VISUALIZATION TAB FUNCTIONS --------------
262
+ def list_indexed_documents(db_name="default_db"):
263
+ db_path = f"faiss_index_{db_name}"
264
+ metadata_file = os.path.join(db_path, "metadata.json")
265
+
266
+ if not os.path.exists(db_path):
267
+ return f"Il database {db_name} non esiste."
268
+
269
+ if not os.path.exists(metadata_file):
270
+ return f"Nessun documento nel database {db_name}."
271
+
272
+ try:
273
+ with open(metadata_file, 'r') as f:
274
+ metadata = json.load(f)
275
+
276
+ if not metadata:
277
+ return "Nessun documento trovato nel database."
278
+
279
+ output_lines = ["📚 Documenti nel database:"]
280
+ for doc in metadata:
281
+ output_lines.extend([
282
+ f"\n📄 Documento: {doc['title']}",
283
+ f" 📝 Autore: {doc['author']}",
284
+ f" 📁 File: {doc['filename']}",
285
+ f" 🕒 Caricato il: {doc['upload_date']}",
286
+ f" 📑 Chunks: {doc['chunks']}"
287
+ ])
288
+
289
+ result = "\n".join(output_lines)
290
+ logging.info(f"Documenti trovati nel database {db_name}: {result}")
291
+ return result
292
+
293
+ except Exception as e:
294
+ error_msg = f"Errore nella lettura dei metadati: {e}"
295
+ logging.error(error_msg)
296
+ return error_msg
297
+
298
+ # -------------- NEW FEATURES TAB FUNCTIONS --------------
299
  def search_documents(query, db_name="default_db"):
300
  db_path = f"faiss_index_{db_name}"
301
  if not os.path.exists(db_path):
 
313
 
314
  # Collect the document contents
315
  results = [doc.page_content for doc in docs]
316
+ return "\n\n".join(results)
317
+
318
+ def generate_summary(db_name="default_db"):
319
+ # Placeholder for summarization logic
320
+ return "This is a summary of the documents in the database."
app/import pytest.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from unittest.mock import Mock, patch
3
+ import os
4
+ from .llm_handling import answer_question
5
+
6
+ # app/test_llm_handling.py
7
+
8
+ @pytest.fixture
9
+ def mock_embeddings():
10
+ with patch('langchain_community.embeddings.HuggingFaceEmbeddings') as mock:
11
+ yield mock
12
+
13
+ @pytest.fixture
14
+ def mock_vectorstore():
15
+ with patch('langchain_community.vectorstores.FAISS') as mock:
16
+ mock_instance = Mock()
17
+ mock_instance.as_retriever.return_value = Mock()
18
+ mock.load_local.return_value = mock_instance
19
+ yield mock
20
+
21
+ @pytest.fixture
22
+ def mock_chat_openai():
23
+ with patch('langchain_openai.ChatOpenAI') as mock:
24
+ yield mock
25
+
26
+ def test_database_not_found():
27
+ result = answer_question("test question", "nonexistent_db")
28
+ assert len(result) == 2
29
+ assert result[0]["role"] == "user"
30
+ assert result[0]["content"] == "test question"
31
+ assert result[1]["role"] == "assistant"
32
+ assert result[1]["content"] == "Database non trovato"
33
+
34
+ @patch('os.path.exists', return_value=True)
35
+ def test_successful_answer(mock_exists, mock_embeddings, mock_vectorstore, mock_chat_openai):
36
+ mock_qa_chain = Mock()
37
+ mock_qa_chain.return_value = {"result": "Test answer"}
38
+
39
+ with patch('langchain.chains.RetrievalQA.from_chain_type', return_value=mock_qa_chain):
40
+ result = answer_question("test question", "test_db")
41
+
42
+ assert len(result) == 2
43
+ assert result[0]["role"] == "user"
44
+ assert result[0]["content"] == "test question"
45
+ assert result[1]["role"] == "assistant"
46
+ assert result[1]["content"] == "Test answer"
47
+
48
+ @patch('os.path.exists', return_value=True)
49
+ def test_error_handling(mock_exists, mock_embeddings):
50
+ mock_embeddings.side_effect = Exception("Test error")
51
+
52
+ result = answer_question("test question", "test_db")
53
+
54
+ assert len(result) == 2
55
+ assert result[0]["role"] == "user"
56
+ assert result[0]["content"] == "test question"
57
+ assert result[1]["role"] == "assistant"
58
+ assert "Si è verificato un errore: Test error" in result[1]["content"]
app/llm_handling.py CHANGED
@@ -1,104 +1,118 @@
1
  import logging
2
- from langchain_openai import ChatOpenAI
 
 
 
3
  from langchain_community.vectorstores import FAISS
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
- from langchain.chains import RetrievalQA
 
6
  from app.config import OPENAI_API_KEY
7
- import gradio as gr # Da aggiungere per le funzioni che usano gr.Dropdown.update()
8
- import os
9
- import shutil
10
 
11
  logging.basicConfig(level=logging.INFO)
12
 
13
- def answer_question(question, db_name, chat_history=[]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  logging.info(f"Inizio elaborazione domanda: {question} per database: {db_name}")
15
 
16
  try:
17
- # 1. Carica il database FAISS
18
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
19
  db_path = f"faiss_index_{db_name}"
20
 
21
  if not os.path.exists(db_path):
22
- return "Database non trovato. Seleziona un database valido."
23
-
24
- vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
 
 
25
 
26
- # 2. Recupera più contesto rilevante
27
- docs = vectorstore.similarity_search(question, k=5) # Aumentato da 3 a 5
28
- context = "\n\n=== Documento ===\n".join([doc.page_content for doc in docs])
29
- logging.info(f"Contesto recuperato: {context}")
30
 
31
- # 3. Prompt migliorato per gestire più contenuti
32
- prompt = f"""Analizza i seguenti documenti e fornisci una risposta strutturata nel seguente formato.
33
- Mantieni un tono conversazionale e fai riferimento alle domande precedenti se pertinenti.
34
 
35
- Cronologia della conversazione:
36
- {chat_history}
37
-
38
- SINTESI DEI DOCUMENTI:
39
- - Elenca brevemente i principali argomenti trovati nei documenti (massimo 3 punti)
40
-
41
- ANALISI:
42
- - Numero di documenti analizzati: [numero]
43
- - Principali concetti rilevanti per la domanda
44
 
45
- RISPOSTA DETTAGLIATA:
46
- - Risposta completa e conversazionale basata sui documenti
47
 
48
- Documenti di riferimento:
49
- {context}
 
 
50
 
51
- Domanda: {question}
52
- """
53
-
54
- # 4. Inizializza l'LLM
55
- llm = ChatOpenAI(
56
- model_name="gpt-4o-mini", # Aggiornato a GPT-4 Turbo
57
- openai_api_key=OPENAI_API_KEY,
58
- temperature=0.7, # Aumentato per risposte più creative
59
- max_tokens=2000 # Aumentato per risposte più complete
60
  )
61
-
62
- # 5. Catena RAG ottimizzata con più contesto
63
- chain = RetrievalQA.from_chain_type(
64
- llm=llm,
65
- chain_type="stuff",
66
- retriever=vectorstore.as_retriever(
67
- search_kwargs={
68
- "k": 8, # Aumentato da 5 a 8
69
- "fetch_k": 15, # Aumentato da 10 a 15
70
- "score_threshold": 0.3 # Ridotto da 0.5 a 0.3 per includere più contesto
71
- }
72
- ),
73
- return_source_documents=True,
74
- verbose=True
75
- )
76
-
77
- # 6. Ottieni la risposta
78
- result = chain({"query": prompt})
79
- answer = result["result"]
80
 
81
- logging.info(f"Risposta generata: {answer}")
82
- return answer
 
 
 
 
83
 
84
  except Exception as e:
85
  logging.error(f"Errore durante la generazione della risposta: {e}")
86
- return f"Si è verificato un errore: {str(e)}"
 
 
 
 
87
 
88
- # Nel document_handling.py, aggiornare delete_database per restituire anche l'aggiornamento del dropdown
89
  def delete_database(db_name):
 
 
 
 
90
  db_path = f"faiss_index_{db_name}"
91
  if not os.path.exists(db_path):
92
- return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=list_databases())
93
  try:
94
  shutil.rmtree(db_path)
95
  logging.info(f"Database {db_name} eliminato con successo.")
96
- return f"Database {db_name} eliminato con successo.", gr.Dropdown.update(choices=list_databases())
 
97
  except OSError as e:
98
  logging.error(f"Impossibile eliminare il database {db_name}: {e}")
99
- return f"Impossibile eliminare il database {db_name}: {e}", gr.Dropdown.update(choices=list_databases())
 
100
 
101
- # Manca la chiamata a ensure_default_db()
102
  if __name__ == "__main__":
103
- ensure_default_db() # Aggiungere questa chiamata
104
- rag_chatbot.launch(share=True)
 
 
 
 
 
 
 
 
 
1
  import logging
2
+ import os
3
+ import shutil
4
+
5
+ from openai import OpenAI
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ import gradio as gr
9
+
10
  from app.config import OPENAI_API_KEY
11
+ # Se hai funzioni per gestire i database (list_databases, ensure_default_db, ecc.),
12
+ # importale dal modulo corretto:
13
+ # from app.document_handling import list_databases, ensure_default_db
14
 
15
  logging.basicConfig(level=logging.INFO)
16
 
17
+ def answer_question(question, db_name, chat_history=None):
18
+ """
19
+ Risponde alla domanda 'question' usando i documenti del database 'db_name'.
20
+ Restituisce una lista di 2 messaggi in formato:
21
+ [
22
+ {"role": "user", "content": <domanda>},
23
+ {"role": "assistant", "content": <risposta>}
24
+ ]
25
+
26
+ In questa versione, viene effettuato il log dei 'chunk' recuperati durante
27
+ la ricerca di similarità.
28
+ """
29
+ if chat_history is None:
30
+ chat_history = []
31
+
32
  logging.info(f"Inizio elaborazione domanda: {question} per database: {db_name}")
33
 
34
  try:
 
35
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
36
  db_path = f"faiss_index_{db_name}"
37
 
38
  if not os.path.exists(db_path):
39
+ logging.warning(f"Database {db_name} non trovato.")
40
+ return [
41
+ {"role": "user", "content": question},
42
+ {"role": "assistant", "content": "Database non trovato"}
43
+ ]
44
 
45
+ # Carica l'indice FAISS
46
+ vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
 
 
47
 
48
+ # Cerca i documenti (chunk) più simili
49
+ relevant_docs = vectorstore.similarity_search(question, k=3)
 
50
 
51
+ # Logga i chunk recuperati
52
+ for idx, doc in enumerate(relevant_docs):
53
+ logging.info(f"--- Chunk {idx+1} ---")
54
+ logging.info(doc.page_content)
55
+ logging.info("---------------------")
56
+
57
+ # Prepara il contesto dai documenti
58
+ context = "\n".join([doc.page_content for doc in relevant_docs])
 
59
 
60
+ client = OpenAI(api_key=OPENAI_API_KEY)
 
61
 
62
+ messages = [
63
+ {"role": "system", "content": f"Usa questo contesto per rispondere: {context}"},
64
+ {"role": "user", "content": question}
65
+ ]
66
 
67
+ # Esegui la chiamata a OpenAI
68
+ response = client.chat.completions.create(
69
+ model="gpt-3.5-turbo",
70
+ messages=messages,
71
+ temperature=0,
72
+ max_tokens=2048
 
 
 
73
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ answer = response.choices[0].message.content
76
+
77
+ return [
78
+ {"role": "user", "content": question},
79
+ {"role": "assistant", "content": answer}
80
+ ]
81
 
82
  except Exception as e:
83
  logging.error(f"Errore durante la generazione della risposta: {e}")
84
+ return [
85
+ {"role": "user", "content": question},
86
+ {"role": "assistant", "content": f"Si è verificato un errore: {str(e)}"}
87
+ ]
88
+
89
 
 
90
  def delete_database(db_name):
91
+ """
92
+ Cancella il database FAISS corrispondente a 'db_name'.
93
+ Restituisce un messaggio di stato e l'aggiornamento del dropdown in Gradio.
94
+ """
95
  db_path = f"faiss_index_{db_name}"
96
  if not os.path.exists(db_path):
97
+ return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=[])
98
  try:
99
  shutil.rmtree(db_path)
100
  logging.info(f"Database {db_name} eliminato con successo.")
101
+ # Se hai una funzione list_databases(), usala per aggiornare la dropdown
102
+ return f"Database {db_name} eliminato con successo.", gr.Dropdown.update(choices=[])
103
  except OSError as e:
104
  logging.error(f"Impossibile eliminare il database {db_name}: {e}")
105
+ return f"Impossibile eliminare il database {db_name}: {e}", gr.Dropdown.update(choices=[])
106
+
107
 
 
108
  if __name__ == "__main__":
109
+ # Se esiste una funzione ensure_default_db(), decommenta:
110
+ # ensure_default_db()
111
+
112
+ # Qui potresti testare la funzione answer_question o avviare
113
+ # il tuo server Gradio. Ad esempio:
114
+ #
115
+ # from app.interface import rag_chatbot
116
+ # rag_chatbot.launch(share=True)
117
+
118
+ pass
app/llm_handling_3.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from openai import OpenAI
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from app.config import OPENAI_API_KEY
6
+ import gradio as gr
7
+ import os
8
+ import shutil
9
+
10
+ logging.basicConfig(level=logging.INFO)
11
+
12
+ def answer_question(question, db_name, chat_history=None):
13
+ if chat_history is None:
14
+ chat_history = []
15
+
16
+ logging.info(f"Inizio elaborazione domanda: {question} per database: {db_name}")
17
+
18
+ try:
19
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
20
+ db_path = f"faiss_index_{db_name}"
21
+
22
+ if not os.path.exists(db_path):
23
+ return [{"role": "user", "content": question},
24
+ {"role": "assistant", "content": "Database non trovato"}]
25
+
26
+ vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
27
+ relevant_docs = vectorstore.similarity_search(question, k=3)
28
+
29
+ # Prepara il contesto dai documenti
30
+ context = "\n".join([doc.page_content for doc in relevant_docs])
31
+
32
+ client = OpenAI(api_key=OPENAI_API_KEY)
33
+
34
+ messages = [
35
+ {"role": "system", "content": f"Usa questo contesto per rispondere: {context}"},
36
+ {"role": "user", "content": question}
37
+ ]
38
+
39
+ response = client.chat.completions.create(
40
+ model="gpt-3.5-turbo", # Cambiato da gpt-4o-mini a un modello supportato
41
+ messages=messages,
42
+ temperature=0,
43
+ max_tokens=2048
44
+ )
45
+
46
+ answer = response.choices[0].message.content
47
+
48
+ return [
49
+ {"role": "user", "content": question},
50
+ {"role": "assistant", "content": answer}
51
+ ]
52
+
53
+ except Exception as e:
54
+ logging.error(f"Errore durante la generazione della risposta: {e}")
55
+ return [
56
+ {"role": "user", "content": question},
57
+ {"role": "assistant", "content": f"Si è verificato un errore: {str(e)}"}
58
+ ]
59
+
60
+ # Nel document_handling.py, aggiornare delete_database per restituire anche l'aggiornamento del dropdown
61
+ def delete_database(db_name):
62
+ db_path = f"faiss_index_{db_name}"
63
+ if not os.path.exists(db_path):
64
+ return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=list_databases())
65
+ try:
66
+ shutil.rmtree(db_path)
67
+ logging.info(f"Database {db_name} eliminato con successo.")
68
+ return f"Database {db_name} eliminato con successo.", gr.Dropdown.update(choices=list_databases())
69
+ except OSError as e:
70
+ logging.error(f"Impossibile eliminare il database {db_name}: {e}")
71
+ return f"Impossibile eliminare il database {db_name}: {e}", gr.Dropdown.update(choices=list_databases())
72
+
73
+ # Manca la chiamata a ensure_default_db()
74
+ if __name__ == "__main__":
75
+ ensure_default_db() # Aggiungere questa chiamata
76
+ rag_chatbot.launch(share=True)
app/test_llm_handling.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def test_database_not_found():
2
+ result = answer_question("test question", "nonexistent_db")
3
+ assert len(result) == 1
4
+ assert len(result[0]) == 2
5
+ assert result[0][0] == "test question"
6
+ assert result[0][1] == "Database non trovato"
7
+
8
+ @patch('os.path.exists', return_value=True)
9
+ def test_successful_answer(mock_exists, mock_embeddings, mock_vectorstore, mock_chat_openai):
10
+ mock_qa_chain = Mock()
11
+ mock_qa_chain.return_value = {"result": "Test answer"}
12
+
13
+ with patch('langchain.chains.RetrievalQA.from_chain_type', return_value=mock_qa_chain):
14
+ result = answer_question("test question", "test_db")
15
+
16
+ assert len(result) == 1
17
+ assert len(result[0]) == 2
18
+ assert result[0][0] == "test question"
19
+ assert result[0][1] == "Test answer"
20
+
21
+ @patch('os.path.exists', return_value=True)
22
+ def test_error_handling(mock_exists, mock_embeddings):
23
+ mock_embeddings.side_effect = Exception("Test error")
24
+
25
+ result = answer_question("test question", "test_db")
26
+
27
+ assert len(result) == 1
28
+ assert len(result[0]) == 2
29
+ assert result[0][0] == "test question"
30
+ assert "Si è verificato un errore: Test error" in result[0][1]
app_1.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from app.document_handling import *
3
+ from app.llm_handling import answer_question
4
+ from app.logging_config import configure_logging
5
+
6
+ configure_logging()
7
+
8
+ def update_dropdowns():
9
+ """Aggiorna tutti i dropdown con la lista aggiornata dei database"""
10
+ databases = list_databases()
11
+ return [gr.update(choices=databases) for _ in range(6)]
12
+
13
+ def extract_text_from_files(files):
14
+ text = ""
15
+ for file in files:
16
+ try:
17
+ if file.name.endswith('.pdf'):
18
+ text += extract_text_from_pdf(file.name)
19
+ elif file.name.endswith('.docx'):
20
+ text += extract_text_from_docx(file.name)
21
+ else:
22
+ with open(file.name, 'r', encoding='utf-8') as f:
23
+ text += f.read()
24
+ except Exception as e:
25
+ logging.error(f"Errore durante la lettura del file {file.name}: {e}")
26
+ return text
27
+
28
+ with gr.Blocks() as rag_chatbot:
29
+ gr.Markdown("# Chatbot basato su RAG")
30
+
31
+ databases = list_databases()
32
+
33
+ # Definizione dei dropdown prima del loro utilizzo
34
+ db_name_upload = gr.State()
35
+ db_name_list = gr.State()
36
+ db_name_chat = gr.State()
37
+ db_name_new = gr.State()
38
+ modify_db_old_name = gr.State()
39
+ delete_db_dropdown = gr.State()
40
+
41
+
42
+ with gr.Tab("Chatbot"):
43
+ with gr.Row():
44
+ with gr.Column(scale=2):
45
+ db_name_chat = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
46
+ # Aggiornato il tipo del chatbot
47
+ chatbot = gr.Chatbot(label="Conversazione", type="messages")
48
+
49
+ with gr.Row():
50
+ # Aggiunta upload file direttamente nella chat
51
+ file_input = gr.File(
52
+ label="Carica PDF per la conversazione",
53
+ file_types=[".pdf", ".docx", ".txt"],
54
+ file_count="multiple"
55
+ )
56
+ upload_button = gr.Button("Carica Documenti")
57
+
58
+ question_input = gr.Textbox(
59
+ label="Fai una domanda",
60
+ placeholder="Scrivi qui la tua domanda...",
61
+ lines=2
62
+ )
63
+
64
+ with gr.Row():
65
+ ask_button = gr.Button("Invia")
66
+ clear_button = gr.Button("Pulisci Chat")
67
+
68
+ chat_state = gr.State([])
69
+
70
+ def chat_upload_and_respond(files, chat_history, db_name):
71
+ # Estrai il testo dai file
72
+ text = extract_text_from_files(files)
73
+
74
+ # Aggiungi il testo alla chat come messaggio dell'utente
75
+ chat_history.append((None, "📄 Contenuto dei documenti caricati:"))
76
+ chat_history.append((None, text))
77
+
78
+ return chat_history
79
+
80
+ def respond(message, chat_history, db_name):
81
+ bot_message = answer_question(message, db_name)
82
+ chat_history.append((message, bot_message))
83
+ return "", chat_history
84
+
85
+ def clear_chat():
86
+ return [], []
87
+
88
+ # Eventi
89
+ upload_button.click(
90
+ chat_upload_and_respond,
91
+ inputs=[file_input, chat_state, db_name_chat],
92
+ outputs=[chatbot]
93
+ )
94
+
95
+ ask_button.click(
96
+ respond,
97
+ inputs=[question_input, chat_state, db_name_chat],
98
+ outputs=[question_input, chatbot]
99
+ )
100
+
101
+ clear_button.click(
102
+ clear_chat,
103
+ outputs=[chatbot, chat_state]
104
+ )
105
+
106
+ with gr.Tab("Gestione Database"):
107
+ gr.Markdown("## Operazioni sui Database")
108
+
109
+ with gr.Row():
110
+ with gr.Column():
111
+ gr.Markdown("### Crea Database")
112
+ db_name_input = gr.Textbox(label="Nome Nuovo Database")
113
+ create_db_button = gr.Button("Crea Database")
114
+ create_output = gr.Textbox(label="Stato Creazione")
115
+
116
+ with gr.Column():
117
+ gr.Markdown("### Rinomina Database")
118
+ modify_db_old_name = gr.Dropdown(choices=databases, label="Database da Rinominare")
119
+ modify_db_new_name = gr.Textbox(label="Nuovo Nome")
120
+ modify_db_button = gr.Button("Rinomina Database")
121
+ modify_output = gr.Textbox(label="Stato Modifica")
122
+
123
+ with gr.Column():
124
+ gr.Markdown("### Elimina Database")
125
+ delete_db_dropdown = gr.Dropdown(choices=databases, label="Database da Eliminare")
126
+ delete_db_button = gr.Button("Elimina Database")
127
+ delete_output = gr.Textbox(label="Stato Eliminazione")
128
+
129
+ # Eventi per i pulsanti di gestione database
130
+ create_db_button.click(
131
+ create_database,
132
+ inputs=db_name_input,
133
+ outputs=create_output
134
+ ).then(
135
+ update_dropdowns,
136
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
137
+ )
138
+
139
+ modify_db_button.click(
140
+ modify_database,
141
+ inputs=[modify_db_old_name, modify_db_new_name],
142
+ outputs=modify_output
143
+ ).then(
144
+ update_dropdowns,
145
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
146
+ )
147
+
148
+ delete_db_button.click(
149
+ delete_database,
150
+ inputs=delete_db_dropdown,
151
+ outputs=delete_output
152
+ ).then(
153
+ update_dropdowns,
154
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
155
+ )
156
+
157
+ with gr.Tab("Gestione Documenti"):
158
+ with gr.Column():
159
+ gr.Markdown("### Carica Documenti")
160
+ with gr.Row():
161
+ file_input = gr.File(
162
+ label="Carica i tuoi documenti",
163
+ file_types=[".txt", ".pdf", ".docx"],
164
+ file_count="multiple"
165
+ )
166
+ db_name_upload = gr.Dropdown(
167
+ choices=databases,
168
+ label="Seleziona Database",
169
+ value="default_db"
170
+ )
171
+
172
+ with gr.Row():
173
+ title_input = gr.Textbox(label="Titolo del documento")
174
+ author_input = gr.Textbox(label="Autore")
175
+
176
+ upload_button = gr.Button("Indicizza Documenti")
177
+ upload_output = gr.Textbox(label="Stato Upload")
178
+
179
+ with gr.Column():
180
+ gr.Markdown("### Documenti nel Database")
181
+ db_name_list = gr.Dropdown(
182
+ choices=databases,
183
+ label="Seleziona Database",
184
+ value="default_db"
185
+ )
186
+ list_button = gr.Button("Visualizza Files")
187
+ list_output = gr.Textbox(label="Files nel Database")
188
+ delete_file_input = gr.Textbox(label="Nome file da eliminare")
189
+ delete_file_button = gr.Button("Elimina File")
190
+ delete_file_output = gr.Textbox(label="Stato Eliminazione")
191
+
192
+ # Eventi modificati
193
+ upload_button.click(
194
+ upload_and_index,
195
+ inputs=[file_input, title_input, author_input, db_name_upload],
196
+ outputs=upload_output
197
+ ).then(
198
+ list_indexed_files,
199
+ inputs=db_name_list,
200
+ outputs=list_output
201
+ )
202
+
203
+ list_button.click(
204
+ list_indexed_files,
205
+ inputs=db_name_list,
206
+ outputs=list_output
207
+ )
208
+
209
+ delete_file_button.click(
210
+ delete_file_from_database,
211
+ inputs=[delete_file_input, db_name_list],
212
+ outputs=delete_file_output
213
+ ).then(
214
+ list_indexed_files,
215
+ inputs=db_name_list,
216
+ outputs=list_output
217
+ ).then(
218
+ update_dropdowns,
219
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
220
+ )
221
+
222
+ with gr.Tab("Visualizza Documenti Indicizzati"):
223
+ with gr.Column():
224
+ gr.Markdown("### Documenti nel Database")
225
+ db_name_list = gr.Dropdown(
226
+ choices=databases,
227
+ label="Seleziona Database",
228
+ value="default_db",
229
+ interactive=True
230
+ )
231
+ list_button = gr.Button("Visualizza Documenti")
232
+ list_output = gr.Textbox(
233
+ label="Elenco Documenti",
234
+ lines=10,
235
+ interactive=False,
236
+ value="Clicca 'Visualizza Documenti' per vedere l'elenco"
237
+ )
238
+
239
+ # Evento click con aggiornamento
240
+ list_button.click(
241
+ fn=list_indexed_documents,
242
+ inputs=[db_name_list],
243
+ outputs=[list_output],
244
+ api_name="list_docs"
245
+ )
246
+
247
+
248
+
249
+ # Adding a new tab for new functionalities
250
+ with gr.Tab("Nuove Funzionalità"):
251
+ gr.Markdown("## Cerca Documenti e Genera Riassunto")
252
+
253
+ db_name_new = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
254
+ search_input = gr.Textbox(label="Inserisci Termine di Ricerca")
255
+ search_button = gr.Button("Cerca Documenti")
256
+ search_output = gr.Textbox(label="Documenti Trovati")
257
+
258
+ summary_button = gr.Button("Genera Riassunto")
259
+ summary_output = gr.Textbox(label="Riassunto")
260
+
261
+ search_button.click(
262
+ search_documents,
263
+ inputs=[search_input, db_name_new],
264
+ outputs=search_output
265
+ )
266
+
267
+ # summary_button.click(
268
+ # generate_summary,
269
+ # inputs=db_name_new,
270
+ # outputs=summary_output
271
+ # )
272
+
273
+ # Avvio dell'app
274
+ if __name__ == "__main__":
275
+ rag_chatbot.launch()
faiss_index_Daniele2/index.faiss DELETED
Binary file (309 kB)
 
faiss_index_E-learning/index.faiss ADDED
Binary file (66.1 kB). View file
 
{faiss_index_Daniele2 → faiss_index_E-learning}/index.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0106133e5dce16eac6520cb2154cf87e768a4e6ecc950a38b3df93088ed511bf
3
- size 107706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0ec4d3c22f17861b941c079acdf82d250fdafd351e9b05ab3877110a3bbdade
3
+ size 25352
faiss_index_E-learning/metadata.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "filename": "istruzioni obiettivi di apprendimento.pdf",
4
+ "title": "Obiettivi",
5
+ "author": "Daniele",
6
+ "upload_date": "2024-12-31 19:21:10",
7
+ "chunks": 6
8
+ },
9
+ {
10
+ "filename": "mastery_Bloom.pdf",
11
+ "title": "Mastery Learingi",
12
+ "author": "Bloom",
13
+ "upload_date": "2024-12-31 20:25:00",
14
+ "chunks": 43
15
+ }
16
+ ]
requirements.txt CHANGED
@@ -4,98 +4,125 @@ aiohttp==3.11.11
4
  aiosignal==1.3.2
5
  annotated-types==0.7.0
6
  anyio==4.7.0
 
7
  async-timeout==4.0.3
8
  attrs==24.3.0
 
 
 
 
9
  certifi==2024.12.14
10
  charset-normalizer==3.4.1
 
 
11
  click==8.1.8
12
- dataclasses-json==0.6.7
 
13
  distro==1.9.0
 
14
  exceptiongroup==1.2.2
15
- faiss-cpu==1.9.0.post1
16
  fastapi==0.115.6
17
  ffmpy==0.5.0
18
  filelock==3.16.1
 
19
  frozenlist==1.5.0
20
  fsspec==2024.12.0
 
 
21
  gradio==5.9.1
22
  gradio_client==1.5.2
 
23
  h11==0.14.0
24
  httpcore==1.0.7
 
25
  httpx==0.28.1
26
- httpx-sse==0.4.0
27
  huggingface-hub==0.27.0
 
28
  idna==3.10
 
 
29
  Jinja2==3.1.5
30
  jiter==0.8.2
31
- joblib==1.4.2
32
  jsonpatch==1.33
33
  jsonpointer==3.0.0
 
34
  langchain==0.3.13
35
- langchain-community==0.3.13
36
  langchain-core==0.3.28
37
- langchain-huggingface==0.1.2
38
- langchain-openai==0.2.14
39
  langchain-text-splitters==0.3.4
40
  langsmith==0.2.7
41
  lxml==5.3.0
42
  markdown-it-py==3.0.0
43
  MarkupSafe==2.1.5
44
- marshmallow==3.23.2
45
  mdurl==0.1.2
 
 
46
  mpmath==1.3.0
47
  multidict==6.1.0
48
- mypy-extensions==1.0.0
49
- networkx==3.4.2
50
  numpy==1.26.4
 
 
51
  openai==1.58.1
 
 
 
 
 
 
 
 
 
 
52
  orjson==3.10.13
 
53
  packaging==24.2
54
  pandas==2.2.3
55
  pillow==11.0.0
 
56
  propcache==0.2.1
 
 
 
57
  pydantic==2.10.4
58
- pydantic-settings==2.7.0
59
  pydantic_core==2.27.2
60
  pydub==0.25.1
61
  Pygments==2.18.0
62
- PyPDF2==3.0.1
 
 
63
  python-dateutil==2.9.0.post0
64
  python-docx==1.1.2
65
  python-dotenv==1.0.1
66
  python-multipart==0.0.20
67
  pytz==2024.2
68
  PyYAML==6.0.2
69
- regex==2024.11.6
70
  requests==2.32.3
 
71
  requests-toolbelt==1.0.0
72
  rich==13.9.4
 
73
  ruff==0.8.4
74
  safehttpx==0.1.6
75
- safetensors==0.4.5
76
- scikit-learn==1.6.0
77
- scipy==1.14.1
78
  semantic-version==2.10.0
79
- sentence-transformers==3.3.1
80
  shellingham==1.5.4
81
  six==1.17.0
82
  sniffio==1.3.1
83
  SQLAlchemy==2.0.36
84
  starlette==0.41.3
85
- sympy==1.13.1
86
  tenacity==9.0.0
87
- threadpoolctl==3.5.0
88
- tiktoken==0.8.0
89
  tokenizers==0.21.0
 
90
  tomlkit==0.13.2
91
- torch==2.5.1
92
  tqdm==4.67.1
93
- transformers==4.47.1
94
  typer==0.15.1
95
- typing-inspect==0.9.0
96
  typing_extensions==4.12.2
97
  tzdata==2024.2
98
  urllib3==2.3.0
99
  uvicorn==0.34.0
 
 
 
100
  websockets==14.1
 
101
  yarl==1.18.3
 
 
4
  aiosignal==1.3.2
5
  annotated-types==0.7.0
6
  anyio==4.7.0
7
+ asgiref==3.8.1
8
  async-timeout==4.0.3
9
  attrs==24.3.0
10
+ backoff==2.2.1
11
+ bcrypt==4.2.1
12
+ build==1.2.2.post1
13
+ cachetools==5.5.0
14
  certifi==2024.12.14
15
  charset-normalizer==3.4.1
16
+ chroma-hnswlib==0.7.6
17
+ chromadb==0.6.0
18
  click==8.1.8
19
+ coloredlogs==15.0.1
20
+ Deprecated==1.2.15
21
  distro==1.9.0
22
+ durationpy==0.9
23
  exceptiongroup==1.2.2
 
24
  fastapi==0.115.6
25
  ffmpy==0.5.0
26
  filelock==3.16.1
27
+ flatbuffers==24.12.23
28
  frozenlist==1.5.0
29
  fsspec==2024.12.0
30
+ google-auth==2.37.0
31
+ googleapis-common-protos==1.66.0
32
  gradio==5.9.1
33
  gradio_client==1.5.2
34
+ grpcio==1.68.1
35
  h11==0.14.0
36
  httpcore==1.0.7
37
+ httptools==0.6.4
38
  httpx==0.28.1
 
39
  huggingface-hub==0.27.0
40
+ humanfriendly==10.0
41
  idna==3.10
42
+ importlib_metadata==8.5.0
43
+ importlib_resources==6.4.5
44
  Jinja2==3.1.5
45
  jiter==0.8.2
 
46
  jsonpatch==1.33
47
  jsonpointer==3.0.0
48
+ kubernetes==31.0.0
49
  langchain==0.3.13
 
50
  langchain-core==0.3.28
 
 
51
  langchain-text-splitters==0.3.4
52
  langsmith==0.2.7
53
  lxml==5.3.0
54
  markdown-it-py==3.0.0
55
  MarkupSafe==2.1.5
 
56
  mdurl==0.1.2
57
+ mmh3==5.0.1
58
+ monotonic==1.6
59
  mpmath==1.3.0
60
  multidict==6.1.0
 
 
61
  numpy==1.26.4
62
+ oauthlib==3.2.2
63
+ onnxruntime==1.20.1
64
  openai==1.58.1
65
+ opentelemetry-api==1.29.0
66
+ opentelemetry-exporter-otlp-proto-common==1.29.0
67
+ opentelemetry-exporter-otlp-proto-grpc==1.29.0
68
+ opentelemetry-instrumentation==0.50b0
69
+ opentelemetry-instrumentation-asgi==0.50b0
70
+ opentelemetry-instrumentation-fastapi==0.50b0
71
+ opentelemetry-proto==1.29.0
72
+ opentelemetry-sdk==1.29.0
73
+ opentelemetry-semantic-conventions==0.50b0
74
+ opentelemetry-util-http==0.50b0
75
  orjson==3.10.13
76
+ overrides==7.7.0
77
  packaging==24.2
78
  pandas==2.2.3
79
  pillow==11.0.0
80
+ posthog==3.7.4
81
  propcache==0.2.1
82
+ protobuf==5.29.2
83
+ pyasn1==0.6.1
84
+ pyasn1_modules==0.4.1
85
  pydantic==2.10.4
 
86
  pydantic_core==2.27.2
87
  pydub==0.25.1
88
  Pygments==2.18.0
89
+ pypdf==5.1.0
90
+ PyPika==0.48.9
91
+ pyproject_hooks==1.2.0
92
  python-dateutil==2.9.0.post0
93
  python-docx==1.1.2
94
  python-dotenv==1.0.1
95
  python-multipart==0.0.20
96
  pytz==2024.2
97
  PyYAML==6.0.2
 
98
  requests==2.32.3
99
+ requests-oauthlib==2.0.0
100
  requests-toolbelt==1.0.0
101
  rich==13.9.4
102
+ rsa==4.9
103
  ruff==0.8.4
104
  safehttpx==0.1.6
 
 
 
105
  semantic-version==2.10.0
 
106
  shellingham==1.5.4
107
  six==1.17.0
108
  sniffio==1.3.1
109
  SQLAlchemy==2.0.36
110
  starlette==0.41.3
111
+ sympy==1.13.3
112
  tenacity==9.0.0
 
 
113
  tokenizers==0.21.0
114
+ tomli==2.2.1
115
  tomlkit==0.13.2
 
116
  tqdm==4.67.1
 
117
  typer==0.15.1
 
118
  typing_extensions==4.12.2
119
  tzdata==2024.2
120
  urllib3==2.3.0
121
  uvicorn==0.34.0
122
+ uvloop==0.21.0
123
+ watchfiles==1.0.3
124
+ websocket-client==1.8.0
125
  websockets==14.1
126
+ wrapt==1.17.0
127
  yarl==1.18.3
128
+ zipp==3.21.0