Nugh75 commited on
Commit
47e4aa2
·
1 Parent(s): 59310ba

update ristrutturazione file app.py con divisione file

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File di sistema di macOS
2
+ .DS_Store
3
+ .AppleDouble
4
+ .LSOverride
5
+
6
+ # Cartelle di sistema di macOS
7
+ **/.DS_Store
8
+ **/._*
9
+ **/.Spotlight-V100
10
+ **/.Trashes
11
+ **/.fseventsd
12
+
13
+ # File di ambiente e configurazioni locali
14
+ .env
15
+ .venv
16
+ venv/
17
+ env/
18
+
19
+ # File di cache e log
20
+ *.log
21
+ *.pyc
22
+ __pycache__/
23
+ .pytest_cache/
24
+ *.egg-info/
25
+ dist/
26
+ build/
27
+
28
+ # File di editor/IDE
29
+ .vscode/
30
+ .idea/
31
+ *.swp
32
+ *.swo
33
+ *.sublime-workspace
34
+ *.sublime-project
35
+
36
+ # File di Jupyter Notebook
37
+ .ipynb_checkpoints/
38
+
39
+ # File di virtualenv
40
+ bin/
41
+ include/
42
+ lib/
43
+ lib64/
44
+ pip-selfcheck.json
45
+ pyvenv.cfg
46
+
47
+ # File di Python
48
+ *.py[cod]
49
+ *$py.class
50
+ *.so
51
+ .Pythonsource venv/bin/activate
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Edurag Beta
3
+ emoji: 🔥
4
+ colorFrom: pink
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 5.9.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
Structure.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ .
2
+ ├── app.py
3
+ └── app
4
+ ├── __init__.py
5
+ ├── config.py
6
+ ├── document_handling.py
7
+ ├── llm_handling.py
8
+ └── logging_config.py
app.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import logging
3
+
4
+ # Ipotizziamo che tu abbia queste funzioni nel tuo progetto:
5
+ # - list_databases(), create_database(), modify_database(), delete_database()...
6
+ # - list_indexed_files(), upload_and_index(), delete_file_from_database(), etc.
7
+ # - search_documents(), list_indexed_documents()...
8
+ #
9
+ # Se hanno nomi o posizioni diverse, adatta gli import di conseguenza
10
+ from app.document_handling import (
11
+ list_databases,
12
+ create_database,
13
+ modify_database,
14
+ delete_database,
15
+ upload_and_index,
16
+ list_indexed_files,
17
+ delete_file_from_database,
18
+ list_indexed_documents,
19
+ search_documents,
20
+ )
21
+ from app.llm_handling import answer_question
22
+ from app.logging_config import configure_logging
23
+
24
+ configure_logging()
25
+
26
+ def update_dropdowns():
27
+ """Aggiorna tutti i dropdown con la lista aggiornata dei database."""
28
+ databases = list_databases()
29
+ # Ritorniamo 6 update() perché nel codice ci sono 6 dropdown da sincronizzare
30
+ return [gr.update(choices=databases) for _ in range(6)]
31
+
32
+ def extract_text_from_files(files):
33
+ """Estrae e concatena il testo da PDF, DOCX e TXT."""
34
+ text = ""
35
+ for file in files:
36
+ try:
37
+ if file.name.endswith('.pdf'):
38
+ text += extract_text_from_pdf(file.name) # Definita in document_handling
39
+ elif file.name.endswith('.docx'):
40
+ text += extract_text_from_docx(file.name) # Definita in document_handling
41
+ else:
42
+ with open(file.name, 'r', encoding='utf-8') as f:
43
+ text += f.read()
44
+ except Exception as e:
45
+ logging.error(f"Errore durante la lettura del file {file.name}: {e}")
46
+ return text
47
+
48
+
49
+ with gr.Blocks() as rag_chatbot:
50
+ gr.Markdown("# Chatbot basato su RAG")
51
+
52
+ databases = list_databases()
53
+
54
+ # Questi State() servono per la gestione dei dropdown.
55
+ # Se non ti servono come stati separati, puoi anche rimuoverli.
56
+ db_name_upload = gr.State()
57
+ db_name_list = gr.State()
58
+ db_name_chat = gr.State()
59
+ db_name_new = gr.State()
60
+ modify_db_old_name = gr.State()
61
+ delete_db_dropdown = gr.State()
62
+
63
+ # =============================================
64
+ # TAB: Chatbot
65
+ # =============================================
66
+ with gr.Tab("Chatbot"):
67
+ with gr.Row():
68
+ with gr.Column(scale=2):
69
+ # Dropdown per selezionare il DB
70
+ db_name_chat = gr.Dropdown(
71
+ choices=databases,
72
+ label="Seleziona Database",
73
+ value="default_db"
74
+ )
75
+
76
+ # Chatbot component
77
+ chatbot = gr.Chatbot(label="Conversazione", type="messages")
78
+
79
+ # Input domanda
80
+ question_input = gr.Textbox(
81
+ label="Fai una domanda",
82
+ placeholder="Scrivi qui la tua domanda...",
83
+ lines=2
84
+ )
85
+ # Bottoni azione
86
+ with gr.Row():
87
+ ask_button = gr.Button("Invia")
88
+ clear_button = gr.Button("Pulisci Chat")
89
+
90
+ # File upload con dimensioni ridotte
91
+ with gr.Row():
92
+ file_input = gr.File(
93
+ label="Carica PDF/Docx/TXT per la conversazione",
94
+ file_types=[".pdf", ".docx", ".txt"],
95
+ file_count="multiple",
96
+ height="100px", # Altezza ridotta
97
+ scale=3 # Riduce la larghezza relativa
98
+ )
99
+ upload_button = gr.Button("Carica Documenti", scale=1)
100
+
101
+
102
+
103
+ # Stato chat
104
+ chat_state = gr.State([])
105
+
106
+ # ----------------------
107
+ # FUNZIONI DI CALLBACK
108
+ # ----------------------
109
+ def chat_upload_and_respond(files, chat_history, db_name):
110
+ # Se chat_history è None, inizializziamo
111
+ if chat_history is None:
112
+ chat_history = []
113
+
114
+ # Estrai il testo dai file
115
+ text = extract_text_from_files(files)
116
+
117
+ # Aggiungo un messaggio "assistant" che mostra il testo caricato
118
+ chat_history.append({
119
+ "role": "assistant",
120
+ "content": f"📄 Contenuto dei documenti caricati:\n{text}"
121
+ })
122
+
123
+ return chat_history
124
+
125
+ def respond(message, chat_history, db_name):
126
+ if chat_history is None:
127
+ chat_history = []
128
+
129
+ # `answer_question` restituisce due messaggi (user + assistant) in lista
130
+ new_messages = answer_question(message, db_name)
131
+
132
+ # Li aggiungiamo in coda alla history
133
+ chat_history.extend(new_messages)
134
+
135
+ # Ritorniamo l'input svuotato (per pulire il Textbox) e la nuova history
136
+ return "", chat_history
137
+
138
+ def clear_chat():
139
+ # Svuota la chat
140
+ return [], []
141
+
142
+ # ------------------
143
+ # EVENTI BOTTONE
144
+ # ------------------
145
+ upload_button.click(
146
+ fn=chat_upload_and_respond,
147
+ inputs=[file_input, chat_state, db_name_chat],
148
+ outputs=chatbot
149
+ )
150
+
151
+ ask_button.click(
152
+ fn=respond,
153
+ inputs=[question_input, chat_state, db_name_chat],
154
+ outputs=[question_input, chatbot]
155
+ )
156
+
157
+ clear_button.click(
158
+ fn=clear_chat,
159
+ outputs=[chatbot, chat_state]
160
+ )
161
+
162
+
163
+ # =============================================
164
+ # TAB: Gestione Database
165
+ # =============================================
166
+ with gr.Tab("Gestione Database"):
167
+ gr.Markdown("## Operazioni sui Database")
168
+
169
+ with gr.Row():
170
+ with gr.Column():
171
+ gr.Markdown("### Crea Database")
172
+ db_name_input = gr.Textbox(label="Nome Nuovo Database")
173
+ create_db_button = gr.Button("Crea Database")
174
+ create_output = gr.Textbox(label="Stato Creazione")
175
+
176
+ with gr.Column():
177
+ gr.Markdown("### Rinomina Database")
178
+ modify_db_old_name = gr.Dropdown(choices=databases, label="Database da Rinominare")
179
+ modify_db_new_name = gr.Textbox(label="Nuovo Nome")
180
+ modify_db_button = gr.Button("Rinomina Database")
181
+ modify_output = gr.Textbox(label="Stato Modifica")
182
+
183
+ with gr.Column():
184
+ gr.Markdown("### Elimina Database")
185
+ delete_db_dropdown = gr.Dropdown(choices=databases, label="Database da Eliminare")
186
+ delete_db_button = gr.Button("Elimina Database")
187
+ delete_output = gr.Textbox(label="Stato Eliminazione")
188
+
189
+ # Eventi per i pulsanti di gestione DB
190
+ create_db_button.click(
191
+ create_database, # funzione
192
+ inputs=db_name_input, # input
193
+ outputs=create_output # output
194
+ ).then(
195
+ update_dropdowns,
196
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
197
+ )
198
+
199
+ modify_db_button.click(
200
+ modify_database,
201
+ inputs=[modify_db_old_name, modify_db_new_name],
202
+ outputs=modify_output
203
+ ).then(
204
+ update_dropdowns,
205
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
206
+ )
207
+
208
+ delete_db_button.click(
209
+ delete_database,
210
+ inputs=delete_db_dropdown,
211
+ outputs=delete_output
212
+ ).then(
213
+ update_dropdowns,
214
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
215
+ )
216
+
217
+
218
+ # =============================================
219
+ # TAB: Gestione Documenti
220
+ # =============================================
221
+ with gr.Tab("Gestione Documenti"):
222
+ with gr.Column():
223
+ gr.Markdown("### Carica Documenti")
224
+ with gr.Row():
225
+ file_input = gr.File(
226
+ label="Carica i tuoi documenti",
227
+ file_types=[".txt", ".pdf", ".docx"],
228
+ file_count="multiple"
229
+ )
230
+ db_name_upload = gr.Dropdown(
231
+ choices=databases,
232
+ label="Seleziona Database",
233
+ value="default_db"
234
+ )
235
+
236
+ with gr.Row():
237
+ title_input = gr.Textbox(label="Titolo del documento")
238
+ author_input = gr.Textbox(label="Autore")
239
+
240
+ upload_button = gr.Button("Indicizza Documenti")
241
+ upload_output = gr.Textbox(label="Stato Upload")
242
+
243
+ with gr.Column():
244
+ gr.Markdown("### Documenti nel Database")
245
+ db_name_list = gr.Dropdown(
246
+ choices=databases,
247
+ label="Seleziona Database",
248
+ value="default_db"
249
+ )
250
+ list_button = gr.Button("Visualizza Files")
251
+ list_output = gr.Textbox(label="Files nel Database")
252
+ delete_file_input = gr.Textbox(label="Nome file da eliminare")
253
+ delete_file_button = gr.Button("Elimina File")
254
+ delete_file_output = gr.Textbox(label="Stato Eliminazione")
255
+
256
+ # Eventi
257
+ upload_button.click(
258
+ upload_and_index,
259
+ inputs=[file_input, title_input, author_input, db_name_upload],
260
+ outputs=upload_output
261
+ ).then(
262
+ list_indexed_files,
263
+ inputs=db_name_list,
264
+ outputs=list_output
265
+ )
266
+
267
+ list_button.click(
268
+ list_indexed_files,
269
+ inputs=db_name_list,
270
+ outputs=list_output
271
+ )
272
+
273
+ delete_file_button.click(
274
+ delete_file_from_database,
275
+ inputs=[delete_file_input, db_name_list],
276
+ outputs=delete_file_output
277
+ ).then(
278
+ list_indexed_files,
279
+ inputs=db_name_list,
280
+ outputs=list_output
281
+ ).then(
282
+ update_dropdowns,
283
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
284
+ )
285
+
286
+
287
+ # =============================================
288
+ # TAB: Visualizza Documenti Indicizzati
289
+ # =============================================
290
+ with gr.Tab("Visualizza Documenti Indicizzati"):
291
+ with gr.Column():
292
+ gr.Markdown("### Documenti nel Database")
293
+ db_name_list = gr.Dropdown(
294
+ choices=databases,
295
+ label="Seleziona Database",
296
+ value="default_db",
297
+ interactive=True
298
+ )
299
+ list_button = gr.Button("Visualizza Documenti")
300
+ list_output = gr.Textbox(
301
+ label="Elenco Documenti",
302
+ lines=10,
303
+ interactive=False,
304
+ value="Clicca 'Visualizza Documenti' per vedere l'elenco"
305
+ )
306
+
307
+ list_button.click(
308
+ fn=list_indexed_documents,
309
+ inputs=[db_name_list],
310
+ outputs=[list_output],
311
+ api_name="list_docs"
312
+ )
313
+
314
+ # =============================================
315
+ # TAB: Nuove Funzionalità
316
+ # =============================================
317
+ with gr.Tab("Nuove Funzionalità"):
318
+ gr.Markdown("## Cerca Documenti e Genera Riassunto")
319
+
320
+ db_name_new = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
321
+ search_input = gr.Textbox(label="Inserisci Termine di Ricerca")
322
+ search_button = gr.Button("Cerca Documenti")
323
+ search_output = gr.Textbox(label="Documenti Trovati")
324
+
325
+ summary_button = gr.Button("Genera Riassunto")
326
+ summary_output = gr.Textbox(label="Riassunto")
327
+
328
+ search_button.click(
329
+ search_documents,
330
+ inputs=[search_input, db_name_new],
331
+ outputs=search_output
332
+ )
333
+ # Esempio di eventuale generazione riassunto
334
+ # summary_button.click(
335
+ # generate_summary,
336
+ # inputs=db_name_new,
337
+ # outputs=summary_output
338
+ # )
339
+
340
+ # Avvio dell'app
341
+ if __name__ == "__main__":
342
+ rag_chatbot.launch()
app/__init__.py ADDED
File without changes
app/app.py ADDED
File without changes
app/config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Carica le variabili d'ambiente dal file .env
5
+ load_dotenv()
6
+
7
+ # Configurazione del modello
8
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
9
+ if not OPENAI_API_KEY:
10
+ raise ValueError("OPENAI_API_KEY non trovata. Verifica il file .env")
app/document_handling.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import gradio as gr # Aggiunto import mancante
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ import os
6
+ import shutil
7
+ import PyPDF2
8
+ from docx import Document
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from dataclasses import dataclass
11
+ import json
12
+ from datetime import datetime
13
+
14
+ # Initialize the text splitter
15
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
16
+
17
+ # -------------- UTILITY FUNCTIONS --------------
18
+ @dataclass
19
+ class DocumentMetadata:
20
+ filename: str
21
+ title: str
22
+ author: str
23
+ upload_date: str
24
+ chunks: int
25
+
26
+ def to_dict(self):
27
+ return {
28
+ "filename": self.filename,
29
+ "title": self.title,
30
+ "author": self.author,
31
+ "upload_date": self.upload_date,
32
+ "chunks": self.chunks
33
+ }
34
+
35
+ def save_metadata(metadata_list, db_name):
36
+ db_path = f"faiss_index_{db_name}"
37
+ metadata_file = os.path.join(db_path, "metadata.json")
38
+
39
+ existing_metadata = []
40
+ if os.path.exists(metadata_file):
41
+ with open(metadata_file, 'r') as f:
42
+ existing_metadata = json.load(f)
43
+
44
+ existing_metadata.extend([m.to_dict() for m in metadata_list])
45
+
46
+ with open(metadata_file, 'w') as f:
47
+ json.dump(existing_metadata, f, indent=2)
48
+
49
+ def extract_text_from_pdf(file_path):
50
+ with open(file_path, 'rb') as f:
51
+ reader = PyPDF2.PdfReader(f)
52
+ text = ""
53
+ for page in reader.pages:
54
+ text += page.extract_text()
55
+ return text
56
+
57
+ def extract_text_from_docx(file_path):
58
+ doc = Document(file_path)
59
+ text = ""
60
+ for para in doc.paragraphs:
61
+ text += para.text + "\n"
62
+ return text
63
+
64
+ # -------------- CHATBOT TAB FUNCTIONS --------------
65
+ def answer_question(question, db_name="default_db"):
66
+ db_path = f"faiss_index_{db_name}"
67
+ if not os.path.exists(db_path):
68
+ logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
69
+ return "Database non trovato."
70
+
71
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
72
+ vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
73
+
74
+ # Perform a similarity search
75
+ docs = vectorstore.similarity_search(question)
76
+
77
+ if not docs:
78
+ return "Nessun documento corrispondente alla query."
79
+
80
+ # Collect the document contents
81
+ results = [doc.page_content for doc in docs]
82
+ return "\n\n".join(results)
83
+
84
+ # -------------- DATABASE MANAGEMENT TAB FUNCTIONS --------------
85
+ def create_database(db_name):
86
+ logging.info(f"Creating database: {db_name}")
87
+ db_path = f"faiss_index_{db_name}"
88
+
89
+ if os.path.exists(db_path):
90
+ return f"Il database {db_name} esiste già."
91
+
92
+ try:
93
+ os.makedirs(db_path)
94
+ logging.info(f"Database {db_name} created successfully.")
95
+ databases = list_databases()
96
+ return (f"Database {db_name} creato con successo.", databases)
97
+ except Exception as e:
98
+ logging.error(f"Errore nella creazione del database: {e}")
99
+ return (f"Errore nella creazione del database: {e}", [])
100
+
101
+ def delete_database(db_name):
102
+ db_path = f"faiss_index_{db_name}"
103
+ if not os.path.exists(db_path):
104
+ return f"Il database {db_name} non esiste."
105
+ try:
106
+ shutil.rmtree(db_path)
107
+ logging.info(f"Database {db_name} eliminato con successo.")
108
+ return f"Database {db_name} eliminato con successo."
109
+ except OSError as e:
110
+ logging.error(f"Impossibile eliminare il database {db_name}: {e}")
111
+ return f"Impossibile eliminare il database {db_name}: {e}"
112
+
113
+ def modify_database(old_db_name, new_db_name):
114
+ old_db_path = f"faiss_index_{old_db_name}"
115
+ new_db_path = f"faiss_index_{new_db_name}"
116
+ if not os.path.exists(old_db_path):
117
+ return f"Il database {old_db_name} non esiste."
118
+ if os.path.exists(new_db_path):
119
+ return f"Il database {new_db_name} esiste già."
120
+ try:
121
+ os.rename(old_db_path, new_db_path)
122
+ return f"Database {old_db_name} rinominato in {new_db_name} con successo."
123
+ except Exception as e:
124
+ return f"Errore durante la modifica del database: {e}"
125
+
126
+ def list_databases():
127
+ try:
128
+ databases = []
129
+ for item in os.listdir():
130
+ if os.path.isdir(item) and item.startswith("faiss_index_"):
131
+ db_name = item.replace("faiss_index_", "")
132
+ databases.append(db_name)
133
+ # Ensure "default_db" is in the list
134
+ if "default_db" not in databases:
135
+ databases.append("default_db")
136
+ return databases
137
+ except Exception as e:
138
+ logging.error(f"Error listing databases: {e}")
139
+ return []
140
+
141
+ # -------------- DOCUMENT MANAGEMENT TAB FUNCTIONS --------------
142
+ def upload_and_index(files, title, author, db_name="default_db"):
143
+ if not files:
144
+ return "Nessun file caricato."
145
+
146
+ documents = []
147
+ doc_metadata = []
148
+
149
+ for file in files:
150
+ try:
151
+ if file.name.endswith('.pdf'):
152
+ text = extract_text_from_pdf(file.name)
153
+ elif file.name.endswith('.docx'):
154
+ text = extract_text_from_docx(file.name)
155
+ else:
156
+ with open(file.name, 'r', encoding='utf-8') as f:
157
+ text = f.read()
158
+
159
+ chunks = text_splitter.split_text(text)
160
+
161
+ # Metadata per il documento
162
+ doc_meta = DocumentMetadata(
163
+ filename=os.path.basename(file.name),
164
+ title=title,
165
+ author=author,
166
+ upload_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
167
+ chunks=len(chunks)
168
+ )
169
+
170
+ # Metadata per ogni chunk
171
+ for i, chunk in enumerate(chunks):
172
+ chunk_metadata = {
173
+ "content": chunk,
174
+ "source": os.path.basename(file.name),
175
+ "title": title,
176
+ "author": author,
177
+ "chunk_index": i,
178
+ "total_chunks": len(chunks),
179
+ "upload_date": doc_meta.upload_date
180
+ }
181
+ documents.append(chunk_metadata)
182
+
183
+ doc_metadata.append(doc_meta)
184
+
185
+ except Exception as e:
186
+ logging.error(f"Errore durante la lettura del file {file.name}: {e}")
187
+ continue
188
+
189
+ if documents:
190
+ try:
191
+ db_path = f"faiss_index_{db_name}"
192
+ os.makedirs(db_path, exist_ok=True)
193
+
194
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
195
+ texts = [doc["content"] for doc in documents]
196
+ metadatas = [{k: v for k, v in doc.items() if k != "content"} for doc in documents]
197
+
198
+ vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas)
199
+ vectorstore.save_local(db_path)
200
+
201
+ # Salva i metadati del documento
202
+ save_metadata(doc_metadata, db_name)
203
+
204
+ return f"Documenti indicizzati con successo nel database {db_name}!"
205
+ except Exception as e:
206
+ logging.error(f"Errore durante l'indicizzazione: {e}")
207
+ return f"Errore durante l'indicizzazione: {e}"
208
+
209
+ return "Nessun documento processato."
210
+
211
+ def list_indexed_files(db_name="default_db"):
212
+ db_path = f"faiss_index_{db_name}"
213
+ metadata_file = os.path.join(db_path, "metadata.json")
214
+
215
+ if not os.path.exists(metadata_file):
216
+ return "Nessun file nel database."
217
+
218
+ try:
219
+ with open(metadata_file, 'r') as f:
220
+ metadata = json.load(f)
221
+
222
+ output = []
223
+ for doc in metadata:
224
+ output.append(
225
+ f"📄 {doc['title']}\n"
226
+ f" Autore: {doc['author']}\n"
227
+ f" File: {doc['filename']}\n"
228
+ f" Chunks: {doc['chunks']}\n"
229
+ f" Caricato il: {doc['upload_date']}\n"
230
+ )
231
+
232
+ return "\n".join(output) if output else "Nessun documento nel database."
233
+ except Exception as e:
234
+ logging.error(f"Errore nella lettura dei metadati: {e}")
235
+ return f"Errore nella lettura dei metadati: {e}"
236
+
237
+ def delete_file_from_database(file_name, db_name="default_db"):
238
+ db_path = f"faiss_index_{db_name}"
239
+ file_list_path = os.path.join(db_path, "file_list.txt")
240
+
241
+ if not os.path.exists(file_list_path):
242
+ return "Database non trovato."
243
+
244
+ try:
245
+ # Leggi la lista dei file
246
+ with open(file_list_path, "r") as f:
247
+ files = f.readlines()
248
+
249
+ # Rimuovi il file dalla lista
250
+ files = [f.strip() for f in files if f.strip() != file_name]
251
+
252
+ # Riscrivi la lista aggiornata
253
+ with open(file_list_path, "w") as f:
254
+ for file in files:
255
+ f.write(f"{file}\n")
256
+
257
+ return f"File {file_name} rimosso dal database {db_name}."
258
+ except Exception as e:
259
+ return f"Errore durante la rimozione del file: {e}"
260
+
261
+ # -------------- DOCUMENT VISUALIZATION TAB FUNCTIONS --------------
262
+ def list_indexed_documents(db_name="default_db"):
263
+ db_path = f"faiss_index_{db_name}"
264
+ metadata_file = os.path.join(db_path, "metadata.json")
265
+
266
+ if not os.path.exists(db_path):
267
+ return f"Il database {db_name} non esiste."
268
+
269
+ if not os.path.exists(metadata_file):
270
+ return f"Nessun documento nel database {db_name}."
271
+
272
+ try:
273
+ with open(metadata_file, 'r') as f:
274
+ metadata = json.load(f)
275
+
276
+ if not metadata:
277
+ return "Nessun documento trovato nel database."
278
+
279
+ output_lines = ["📚 Documenti nel database:"]
280
+ for doc in metadata:
281
+ output_lines.extend([
282
+ f"\n📄 Documento: {doc['title']}",
283
+ f" 📝 Autore: {doc['author']}",
284
+ f" 📁 File: {doc['filename']}",
285
+ f" 🕒 Caricato il: {doc['upload_date']}",
286
+ f" 📑 Chunks: {doc['chunks']}"
287
+ ])
288
+
289
+ result = "\n".join(output_lines)
290
+ logging.info(f"Documenti trovati nel database {db_name}: {result}")
291
+ return result
292
+
293
+ except Exception as e:
294
+ error_msg = f"Errore nella lettura dei metadati: {e}"
295
+ logging.error(error_msg)
296
+ return error_msg
297
+
298
+ # -------------- NEW FEATURES TAB FUNCTIONS --------------
299
+ def search_documents(query, db_name="default_db"):
300
+ db_path = f"faiss_index_{db_name}"
301
+ if not os.path.exists(db_path):
302
+ logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
303
+ return "Database non trovato."
304
+
305
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
306
+ vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
307
+
308
+ # Perform a similarity search
309
+ docs = vectorstore.similarity_search(query)
310
+
311
+ if not docs:
312
+ return "Nessun documento corrispondente alla query."
313
+
314
+ # Collect the document contents
315
+ results = [doc.page_content for doc in docs]
316
+ return "\n\n".join(results)
317
+
318
+ def generate_summary(db_name="default_db"):
319
+ # Placeholder for summarization logic
320
+ return "This is a summary of the documents in the database."
app/import pytest.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from unittest.mock import Mock, patch
3
+ import os
4
+ from .llm_handling import answer_question
5
+
6
+ # app/test_llm_handling.py
7
+
8
+ @pytest.fixture
9
+ def mock_embeddings():
10
+ with patch('langchain_community.embeddings.HuggingFaceEmbeddings') as mock:
11
+ yield mock
12
+
13
+ @pytest.fixture
14
+ def mock_vectorstore():
15
+ with patch('langchain_community.vectorstores.FAISS') as mock:
16
+ mock_instance = Mock()
17
+ mock_instance.as_retriever.return_value = Mock()
18
+ mock.load_local.return_value = mock_instance
19
+ yield mock
20
+
21
+ @pytest.fixture
22
+ def mock_chat_openai():
23
+ with patch('langchain_openai.ChatOpenAI') as mock:
24
+ yield mock
25
+
26
+ def test_database_not_found():
27
+ result = answer_question("test question", "nonexistent_db")
28
+ assert len(result) == 2
29
+ assert result[0]["role"] == "user"
30
+ assert result[0]["content"] == "test question"
31
+ assert result[1]["role"] == "assistant"
32
+ assert result[1]["content"] == "Database non trovato"
33
+
34
+ @patch('os.path.exists', return_value=True)
35
+ def test_successful_answer(mock_exists, mock_embeddings, mock_vectorstore, mock_chat_openai):
36
+ mock_qa_chain = Mock()
37
+ mock_qa_chain.return_value = {"result": "Test answer"}
38
+
39
+ with patch('langchain.chains.RetrievalQA.from_chain_type', return_value=mock_qa_chain):
40
+ result = answer_question("test question", "test_db")
41
+
42
+ assert len(result) == 2
43
+ assert result[0]["role"] == "user"
44
+ assert result[0]["content"] == "test question"
45
+ assert result[1]["role"] == "assistant"
46
+ assert result[1]["content"] == "Test answer"
47
+
48
+ @patch('os.path.exists', return_value=True)
49
+ def test_error_handling(mock_exists, mock_embeddings):
50
+ mock_embeddings.side_effect = Exception("Test error")
51
+
52
+ result = answer_question("test question", "test_db")
53
+
54
+ assert len(result) == 2
55
+ assert result[0]["role"] == "user"
56
+ assert result[0]["content"] == "test question"
57
+ assert result[1]["role"] == "assistant"
58
+ assert "Si è verificato un errore: Test error" in result[1]["content"]
app/llm_handling.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import shutil
4
+
5
+ from openai import OpenAI
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ import gradio as gr
9
+
10
+ from app.config import OPENAI_API_KEY
11
+ # Se hai funzioni per gestire i database (list_databases, ensure_default_db, ecc.),
12
+ # importale dal modulo corretto:
13
+ # from app.document_handling import list_databases, ensure_default_db
14
+
15
+ logging.basicConfig(level=logging.INFO)
16
+
17
+ def answer_question(question, db_name, chat_history=None):
18
+ """
19
+ Risponde alla domanda 'question' usando i documenti del database 'db_name'.
20
+ Restituisce una lista di 2 messaggi in formato:
21
+ [
22
+ {"role": "user", "content": <domanda>},
23
+ {"role": "assistant", "content": <risposta>}
24
+ ]
25
+
26
+ In questa versione, viene effettuato il log dei 'chunk' recuperati durante
27
+ la ricerca di similarità.
28
+ """
29
+ if chat_history is None:
30
+ chat_history = []
31
+
32
+ logging.info(f"Inizio elaborazione domanda: {question} per database: {db_name}")
33
+
34
+ try:
35
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
36
+ db_path = f"faiss_index_{db_name}"
37
+
38
+ if not os.path.exists(db_path):
39
+ logging.warning(f"Database {db_name} non trovato.")
40
+ return [
41
+ {"role": "user", "content": question},
42
+ {"role": "assistant", "content": "Database non trovato"}
43
+ ]
44
+
45
+ # Carica l'indice FAISS
46
+ vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
47
+
48
+ # Cerca i documenti (chunk) più simili
49
+ relevant_docs = vectorstore.similarity_search(question, k=3)
50
+
51
+ # Logga i chunk recuperati
52
+ for idx, doc in enumerate(relevant_docs):
53
+ logging.info(f"--- Chunk {idx+1} ---")
54
+ logging.info(doc.page_content)
55
+ logging.info("---------------------")
56
+
57
+ # Prepara il contesto dai documenti
58
+ context = "\n".join([doc.page_content for doc in relevant_docs])
59
+
60
+ client = OpenAI(api_key=OPENAI_API_KEY)
61
+
62
+ messages = [
63
+ {"role": "system", "content": f"Usa questo contesto per rispondere: {context}"},
64
+ {"role": "user", "content": question}
65
+ ]
66
+
67
+ # Esegui la chiamata a OpenAI
68
+ response = client.chat.completions.create(
69
+ model="gpt-3.5-turbo",
70
+ messages=messages,
71
+ temperature=0,
72
+ max_tokens=2048
73
+ )
74
+
75
+ answer = response.choices[0].message.content
76
+
77
+ return [
78
+ {"role": "user", "content": question},
79
+ {"role": "assistant", "content": answer}
80
+ ]
81
+
82
+ except Exception as e:
83
+ logging.error(f"Errore durante la generazione della risposta: {e}")
84
+ return [
85
+ {"role": "user", "content": question},
86
+ {"role": "assistant", "content": f"Si è verificato un errore: {str(e)}"}
87
+ ]
88
+
89
+
90
+ def delete_database(db_name):
91
+ """
92
+ Cancella il database FAISS corrispondente a 'db_name'.
93
+ Restituisce un messaggio di stato e l'aggiornamento del dropdown in Gradio.
94
+ """
95
+ db_path = f"faiss_index_{db_name}"
96
+ if not os.path.exists(db_path):
97
+ return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=[])
98
+ try:
99
+ shutil.rmtree(db_path)
100
+ logging.info(f"Database {db_name} eliminato con successo.")
101
+ # Se hai una funzione list_databases(), usala per aggiornare la dropdown
102
+ return f"Database {db_name} eliminato con successo.", gr.Dropdown.update(choices=[])
103
+ except OSError as e:
104
+ logging.error(f"Impossibile eliminare il database {db_name}: {e}")
105
+ return f"Impossibile eliminare il database {db_name}: {e}", gr.Dropdown.update(choices=[])
106
+
107
+
108
+ if __name__ == "__main__":
109
+ # Se esiste una funzione ensure_default_db(), decommenta:
110
+ # ensure_default_db()
111
+
112
+ # Qui potresti testare la funzione answer_question o avviare
113
+ # il tuo server Gradio. Ad esempio:
114
+ #
115
+ # from app.interface import rag_chatbot
116
+ # rag_chatbot.launch(share=True)
117
+
118
+ pass
app/llm_handling_2.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from langchain_openai import ChatOpenAI
3
+ from app.config import OPENAI_API_KEY
4
+
5
+ def answer_question(question):
6
+ logging.info(f"Chiamata all'LLM con domanda: {question}")
7
+ sys = (
8
+ "Sei un assistente AI per la lingua Italiana di nome Counselorbot. "
9
+ "Rispondi nella lingua usata per la domanda in modo chiaro, semplice ed esaustivo."
10
+ )
11
+
12
+ messages = [
13
+ {"role": "system", "content": sys},
14
+ {"role": "user", "content": question}
15
+ ]
16
+ logging.info(f"Messages sent to LLM: {messages}")
17
+
18
+ try:
19
+ llm = ChatOpenAI(
20
+ model="gpt-4o-mini",
21
+ openai_api_key=OPENAI_API_KEY,
22
+ temperature=0.6,
23
+ max_tokens=512,
24
+ top_p=0.9
25
+ )
26
+ response = llm.invoke(input=messages)
27
+ logging.info(f"Contesto RAG inviato all'LLM: {messages}")
28
+ logging.info(f"Risposta ricevuta dall'LLM: {response}")
29
+ answer = response.content.strip()
30
+ logging.info(f"Domanda: {question} | Risposta: {answer}")
31
+ return answer
32
+ except Exception as e:
33
+ logging.error(f"Errore durante la generazione della risposta: {e}")
34
+ return f"Errore durante la generazione della risposta: {e}"
app/llm_handling_3.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from openai import OpenAI
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from app.config import OPENAI_API_KEY
6
+ import gradio as gr
7
+ import os
8
+ import shutil
9
+
10
+ logging.basicConfig(level=logging.INFO)
11
+
12
+ def answer_question(question, db_name, chat_history=None):
13
+ if chat_history is None:
14
+ chat_history = []
15
+
16
+ logging.info(f"Inizio elaborazione domanda: {question} per database: {db_name}")
17
+
18
+ try:
19
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
20
+ db_path = f"faiss_index_{db_name}"
21
+
22
+ if not os.path.exists(db_path):
23
+ return [{"role": "user", "content": question},
24
+ {"role": "assistant", "content": "Database non trovato"}]
25
+
26
+ vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
27
+ relevant_docs = vectorstore.similarity_search(question, k=3)
28
+
29
+ # Prepara il contesto dai documenti
30
+ context = "\n".join([doc.page_content for doc in relevant_docs])
31
+
32
+ client = OpenAI(api_key=OPENAI_API_KEY)
33
+
34
+ messages = [
35
+ {"role": "system", "content": f"Usa questo contesto per rispondere: {context}"},
36
+ {"role": "user", "content": question}
37
+ ]
38
+
39
+ response = client.chat.completions.create(
40
+ model="gpt-3.5-turbo", # Cambiato da gpt-4o-mini a un modello supportato
41
+ messages=messages,
42
+ temperature=0,
43
+ max_tokens=2048
44
+ )
45
+
46
+ answer = response.choices[0].message.content
47
+
48
+ return [
49
+ {"role": "user", "content": question},
50
+ {"role": "assistant", "content": answer}
51
+ ]
52
+
53
+ except Exception as e:
54
+ logging.error(f"Errore durante la generazione della risposta: {e}")
55
+ return [
56
+ {"role": "user", "content": question},
57
+ {"role": "assistant", "content": f"Si è verificato un errore: {str(e)}"}
58
+ ]
59
+
60
+ # Nel document_handling.py, aggiornare delete_database per restituire anche l'aggiornamento del dropdown
61
+ def delete_database(db_name):
62
+ db_path = f"faiss_index_{db_name}"
63
+ if not os.path.exists(db_path):
64
+ return f"Il database {db_name} non esiste.", gr.Dropdown.update(choices=list_databases())
65
+ try:
66
+ shutil.rmtree(db_path)
67
+ logging.info(f"Database {db_name} eliminato con successo.")
68
+ return f"Database {db_name} eliminato con successo.", gr.Dropdown.update(choices=list_databases())
69
+ except OSError as e:
70
+ logging.error(f"Impossibile eliminare il database {db_name}: {e}")
71
+ return f"Impossibile eliminare il database {db_name}: {e}", gr.Dropdown.update(choices=list_databases())
72
+
73
+ # Manca la chiamata a ensure_default_db()
74
+ if __name__ == "__main__":
75
+ ensure_default_db() # Aggiungere questa chiamata
76
+ rag_chatbot.launch(share=True)
app/logging_config.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from dotenv import load_dotenv
3
+
4
+ # Carica variabili d'ambiente dal file .env
5
+ load_dotenv()
6
+
7
+ def configure_logging():
8
+ logging.basicConfig(
9
+ filename="rag_chatbot.log",
10
+ level=logging.INFO,
11
+ format="%(asctime)s - %(levelname)s - %(message)s"
12
+ )
app/test_llm_handling.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def test_database_not_found():
2
+ result = answer_question("test question", "nonexistent_db")
3
+ assert len(result) == 1
4
+ assert len(result[0]) == 2
5
+ assert result[0][0] == "test question"
6
+ assert result[0][1] == "Database non trovato"
7
+
8
+ @patch('os.path.exists', return_value=True)
9
+ def test_successful_answer(mock_exists, mock_embeddings, mock_vectorstore, mock_chat_openai):
10
+ mock_qa_chain = Mock()
11
+ mock_qa_chain.return_value = {"result": "Test answer"}
12
+
13
+ with patch('langchain.chains.RetrievalQA.from_chain_type', return_value=mock_qa_chain):
14
+ result = answer_question("test question", "test_db")
15
+
16
+ assert len(result) == 1
17
+ assert len(result[0]) == 2
18
+ assert result[0][0] == "test question"
19
+ assert result[0][1] == "Test answer"
20
+
21
+ @patch('os.path.exists', return_value=True)
22
+ def test_error_handling(mock_exists, mock_embeddings):
23
+ mock_embeddings.side_effect = Exception("Test error")
24
+
25
+ result = answer_question("test question", "test_db")
26
+
27
+ assert len(result) == 1
28
+ assert len(result[0]) == 2
29
+ assert result[0][0] == "test question"
30
+ assert "Si è verificato un errore: Test error" in result[0][1]
app_3.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from app.document_handling import *
3
+ from app.llm_handling import answer_question
4
+ from app.logging_config import configure_logging
5
+
6
+ configure_logging()
7
+
8
+ def update_dropdowns():
9
+ """Aggiorna tutti i dropdown con la lista aggiornata dei database"""
10
+ databases = list_databases()
11
+ return [gr.update(choices=databases) for _ in range(6)]
12
+
13
+ def extract_text_from_files(files):
14
+ text = ""
15
+ for file in files:
16
+ try:
17
+ if file.name.endswith('.pdf'):
18
+ text += extract_text_from_pdf(file.name)
19
+ elif file.name.endswith('.docx'):
20
+ text += extract_text_from_docx(file.name)
21
+ else:
22
+ with open(file.name, 'r', encoding='utf-8') as f:
23
+ text += f.read()
24
+ except Exception as e:
25
+ logging.error(f"Errore durante la lettura del file {file.name}: {e}")
26
+ return text
27
+
28
+ with gr.Blocks() as rag_chatbot:
29
+ gr.Markdown("# Chatbot basato su RAG")
30
+
31
+ databases = list_databases()
32
+
33
+ # Definizione dei dropdown prima del loro utilizzo
34
+ db_name_upload = gr.State()
35
+ db_name_list = gr.State()
36
+ db_name_chat = gr.State()
37
+ db_name_new = gr.State()
38
+ modify_db_old_name = gr.State()
39
+ delete_db_dropdown = gr.State()
40
+
41
+
42
+ with gr.Tab("Chatbot"):
43
+ with gr.Row():
44
+ with gr.Column(scale=2):
45
+ db_name_chat = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
46
+ # Aggiornato il tipo del chatbot
47
+ chatbot = gr.Chatbot(label="Conversazione", type="messages")
48
+
49
+ with gr.Row():
50
+ # Aggiunta upload file direttamente nella chat
51
+ file_input = gr.File(
52
+ label="Carica PDF per la conversazione",
53
+ file_types=[".pdf", ".docx", ".txt"],
54
+ file_count="multiple"
55
+ )
56
+ upload_button = gr.Button("Carica Documenti")
57
+
58
+ question_input = gr.Textbox(
59
+ label="Fai una domanda",
60
+ placeholder="Scrivi qui la tua domanda...",
61
+ lines=2
62
+ )
63
+
64
+ with gr.Row():
65
+ ask_button = gr.Button("Invia")
66
+ clear_button = gr.Button("Pulisci Chat")
67
+
68
+ chat_state = gr.State([])
69
+
70
+ def chat_upload_and_respond(files, chat_history, db_name):
71
+ # Estrai il testo dai file
72
+ text = extract_text_from_files(files)
73
+
74
+ # Aggiungi il testo alla chat come messaggio dell'utente
75
+ chat_history.append((None, "📄 Contenuto dei documenti caricati:"))
76
+ chat_history.append((None, text))
77
+
78
+ return chat_history
79
+
80
+ def respond(message, chat_history, db_name):
81
+ bot_message = answer_question(message, db_name)
82
+ chat_history.append((message, bot_message))
83
+ return "", chat_history
84
+
85
+ def clear_chat():
86
+ return [], []
87
+
88
+ # Eventi
89
+ upload_button.click(
90
+ chat_upload_and_respond,
91
+ inputs=[file_input, chat_state, db_name_chat],
92
+ outputs=[chatbot]
93
+ )
94
+
95
+ ask_button.click(
96
+ respond,
97
+ inputs=[question_input, chat_state, db_name_chat],
98
+ outputs=[question_input, chatbot]
99
+ )
100
+
101
+ clear_button.click(
102
+ clear_chat,
103
+ outputs=[chatbot, chat_state]
104
+ )
105
+
106
+ with gr.Tab("Gestione Database"):
107
+ gr.Markdown("## Operazioni sui Database")
108
+
109
+ with gr.Row():
110
+ with gr.Column():
111
+ gr.Markdown("### Crea Database")
112
+ db_name_input = gr.Textbox(label="Nome Nuovo Database")
113
+ create_db_button = gr.Button("Crea Database")
114
+ create_output = gr.Textbox(label="Stato Creazione")
115
+
116
+ with gr.Column():
117
+ gr.Markdown("### Rinomina Database")
118
+ modify_db_old_name = gr.Dropdown(choices=databases, label="Database da Rinominare")
119
+ modify_db_new_name = gr.Textbox(label="Nuovo Nome")
120
+ modify_db_button = gr.Button("Rinomina Database")
121
+ modify_output = gr.Textbox(label="Stato Modifica")
122
+
123
+ with gr.Column():
124
+ gr.Markdown("### Elimina Database")
125
+ delete_db_dropdown = gr.Dropdown(choices=databases, label="Database da Eliminare")
126
+ delete_db_button = gr.Button("Elimina Database")
127
+ delete_output = gr.Textbox(label="Stato Eliminazione")
128
+
129
+ # Eventi per i pulsanti di gestione database
130
+ create_db_button.click(
131
+ create_database,
132
+ inputs=db_name_input,
133
+ outputs=create_output
134
+ ).then(
135
+ update_dropdowns,
136
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
137
+ )
138
+
139
+ modify_db_button.click(
140
+ modify_database,
141
+ inputs=[modify_db_old_name, modify_db_new_name],
142
+ outputs=modify_output
143
+ ).then(
144
+ update_dropdowns,
145
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
146
+ )
147
+
148
+ delete_db_button.click(
149
+ delete_database,
150
+ inputs=delete_db_dropdown,
151
+ outputs=delete_output
152
+ ).then(
153
+ update_dropdowns,
154
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
155
+ )
156
+
157
+ with gr.Tab("Gestione Documenti"):
158
+ with gr.Column():
159
+ gr.Markdown("### Carica Documenti")
160
+ with gr.Row():
161
+ file_input = gr.File(
162
+ label="Carica i tuoi documenti",
163
+ file_types=[".txt", ".pdf", ".docx"],
164
+ file_count="multiple"
165
+ )
166
+ db_name_upload = gr.Dropdown(
167
+ choices=databases,
168
+ label="Seleziona Database",
169
+ value="default_db"
170
+ )
171
+
172
+ with gr.Row():
173
+ title_input = gr.Textbox(label="Titolo del documento")
174
+ author_input = gr.Textbox(label="Autore")
175
+
176
+ upload_button = gr.Button("Indicizza Documenti")
177
+ upload_output = gr.Textbox(label="Stato Upload")
178
+
179
+ with gr.Column():
180
+ gr.Markdown("### Documenti nel Database")
181
+ db_name_list = gr.Dropdown(
182
+ choices=databases,
183
+ label="Seleziona Database",
184
+ value="default_db"
185
+ )
186
+ list_button = gr.Button("Visualizza Files")
187
+ list_output = gr.Textbox(label="Files nel Database")
188
+ delete_file_input = gr.Textbox(label="Nome file da eliminare")
189
+ delete_file_button = gr.Button("Elimina File")
190
+ delete_file_output = gr.Textbox(label="Stato Eliminazione")
191
+
192
+ # Eventi modificati
193
+ upload_button.click(
194
+ upload_and_index,
195
+ inputs=[file_input, title_input, author_input, db_name_upload],
196
+ outputs=upload_output
197
+ ).then(
198
+ list_indexed_files,
199
+ inputs=db_name_list,
200
+ outputs=list_output
201
+ )
202
+
203
+ list_button.click(
204
+ list_indexed_files,
205
+ inputs=db_name_list,
206
+ outputs=list_output
207
+ )
208
+
209
+ delete_file_button.click(
210
+ delete_file_from_database,
211
+ inputs=[delete_file_input, db_name_list],
212
+ outputs=delete_file_output
213
+ ).then(
214
+ list_indexed_files,
215
+ inputs=db_name_list,
216
+ outputs=list_output
217
+ ).then(
218
+ update_dropdowns,
219
+ outputs=[db_name_upload, db_name_list, db_name_chat, db_name_new, modify_db_old_name, delete_db_dropdown]
220
+ )
221
+
222
+ with gr.Tab("Visualizza Documenti Indicizzati"):
223
+ with gr.Column():
224
+ gr.Markdown("### Documenti nel Database")
225
+ db_name_list = gr.Dropdown(
226
+ choices=databases,
227
+ label="Seleziona Database",
228
+ value="default_db",
229
+ interactive=True
230
+ )
231
+ list_button = gr.Button("Visualizza Documenti")
232
+ list_output = gr.Textbox(
233
+ label="Elenco Documenti",
234
+ lines=10,
235
+ interactive=False,
236
+ value="Clicca 'Visualizza Documenti' per vedere l'elenco"
237
+ )
238
+
239
+ # Evento click con aggiornamento
240
+ list_button.click(
241
+ fn=list_indexed_documents,
242
+ inputs=[db_name_list],
243
+ outputs=[list_output],
244
+ api_name="list_docs"
245
+ )
246
+
247
+
248
+
249
+ # Adding a new tab for new functionalities
250
+ with gr.Tab("Nuove Funzionalità"):
251
+ gr.Markdown("## Cerca Documenti e Genera Riassunto")
252
+
253
+ db_name_new = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
254
+ search_input = gr.Textbox(label="Inserisci Termine di Ricerca")
255
+ search_button = gr.Button("Cerca Documenti")
256
+ search_output = gr.Textbox(label="Documenti Trovati")
257
+
258
+ summary_button = gr.Button("Genera Riassunto")
259
+ summary_output = gr.Textbox(label="Riassunto")
260
+
261
+ search_button.click(
262
+ search_documents,
263
+ inputs=[search_input, db_name_new],
264
+ outputs=search_output
265
+ )
266
+
267
+ # summary_button.click(
268
+ # generate_summary,
269
+ # inputs=db_name_new,
270
+ # outputs=summary_output
271
+ # )
272
+
273
+ # Avvio dell'app
274
+ if __name__ == "__main__":
275
+ rag_chatbot.launch()
faiss_index/index.faiss ADDED
Binary file (1.58 kB). View file
 
faiss_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:407d95e0808ddf251e3fb442241edd72c47961f5a38d5546021ef205b9fdeb57
3
+ size 960117
faiss_index_E-learning/index.faiss ADDED
Binary file (66.1 kB). View file
 
faiss_index_E-learning/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0ec4d3c22f17861b941c079acdf82d250fdafd351e9b05ab3877110a3bbdade
3
+ size 25352
faiss_index_E-learning/metadata.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "filename": "istruzioni obiettivi di apprendimento.pdf",
4
+ "title": "Obiettivi",
5
+ "author": "Daniele",
6
+ "upload_date": "2024-12-31 19:21:10",
7
+ "chunks": 6
8
+ },
9
+ {
10
+ "filename": "mastery_Bloom.pdf",
11
+ "title": "Mastery Learingi",
12
+ "author": "Bloom",
13
+ "upload_date": "2024-12-31 20:25:00",
14
+ "chunks": 43
15
+ }
16
+ ]
faiss_index_default_db/index.faiss ADDED
Binary file (309 kB). View file
 
faiss_index_default_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c797df1c4a8ddac75b4b083391220179ce5bbcd2b962b4dfbc7d960628cd0b2
3
+ size 107706
requirements.txt ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohappyeyeballs==2.4.4
3
+ aiohttp==3.11.11
4
+ aiosignal==1.3.2
5
+ annotated-types==0.7.0
6
+ anyio==4.7.0
7
+ asgiref==3.8.1
8
+ async-timeout==4.0.3
9
+ attrs==24.3.0
10
+ backoff==2.2.1
11
+ bcrypt==4.2.1
12
+ build==1.2.2.post1
13
+ cachetools==5.5.0
14
+ certifi==2024.12.14
15
+ charset-normalizer==3.4.1
16
+ chroma-hnswlib==0.7.6
17
+ chromadb==0.6.0
18
+ click==8.1.8
19
+ coloredlogs==15.0.1
20
+ dataclasses-json==0.6.7
21
+ Deprecated==1.2.15
22
+ distro==1.9.0
23
+ durationpy==0.9
24
+ exceptiongroup==1.2.2
25
+ faiss-cpu==1.9.0.post1
26
+ fastapi==0.115.6
27
+ ffmpy==0.5.0
28
+ filelock==3.16.1
29
+ flatbuffers==24.12.23
30
+ frozenlist==1.5.0
31
+ fsspec==2024.12.0
32
+ google-auth==2.37.0
33
+ googleapis-common-protos==1.66.0
34
+ gradio==5.9.1
35
+ gradio_client==1.5.2
36
+ grpcio==1.68.1
37
+ h11==0.14.0
38
+ httpcore==1.0.7
39
+ httptools==0.6.4
40
+ httpx==0.28.1
41
+ httpx-sse==0.4.0
42
+ huggingface-hub==0.27.0
43
+ humanfriendly==10.0
44
+ idna==3.10
45
+ importlib_metadata==8.5.0
46
+ importlib_resources==6.4.5
47
+ Jinja2==3.1.5
48
+ jiter==0.8.2
49
+ joblib==1.4.2
50
+ jsonpatch==1.33
51
+ jsonpointer==3.0.0
52
+ kubernetes==31.0.0
53
+ langchain==0.3.13
54
+ langchain-community==0.3.13
55
+ langchain-core==0.3.28
56
+ langchain-huggingface==0.1.2
57
+ langchain-openai==0.2.14
58
+ langchain-text-splitters==0.3.4
59
+ langsmith==0.2.7
60
+ lxml==5.3.0
61
+ markdown-it-py==3.0.0
62
+ MarkupSafe==2.1.5
63
+ marshmallow==3.23.2
64
+ mdurl==0.1.2
65
+ mmh3==5.0.1
66
+ monotonic==1.6
67
+ mpmath==1.3.0
68
+ multidict==6.1.0
69
+ mypy-extensions==1.0.0
70
+ networkx==3.4.2
71
+ numpy==1.26.4
72
+ oauthlib==3.2.2
73
+ onnxruntime==1.20.1
74
+ openai==1.58.1
75
+ opentelemetry-api==1.29.0
76
+ opentelemetry-exporter-otlp-proto-common==1.29.0
77
+ opentelemetry-exporter-otlp-proto-grpc==1.29.0
78
+ opentelemetry-instrumentation==0.50b0
79
+ opentelemetry-instrumentation-asgi==0.50b0
80
+ opentelemetry-instrumentation-fastapi==0.50b0
81
+ opentelemetry-proto==1.29.0
82
+ opentelemetry-sdk==1.29.0
83
+ opentelemetry-semantic-conventions==0.50b0
84
+ opentelemetry-util-http==0.50b0
85
+ orjson==3.10.13
86
+ overrides==7.7.0
87
+ packaging==24.2
88
+ pandas==2.2.3
89
+ pillow==11.0.0
90
+ posthog==3.7.4
91
+ propcache==0.2.1
92
+ protobuf==5.29.2
93
+ pyasn1==0.6.1
94
+ pyasn1_modules==0.4.1
95
+ pydantic==2.10.4
96
+ pydantic-settings==2.7.1
97
+ pydantic_core==2.27.2
98
+ pydub==0.25.1
99
+ Pygments==2.18.0
100
+ pypdf==5.1.0
101
+ PyPDF2==3.0.1
102
+ PyPika==0.48.9
103
+ pyproject_hooks==1.2.0
104
+ python-dateutil==2.9.0.post0
105
+ python-docx==1.1.2
106
+ python-dotenv==1.0.1
107
+ python-multipart==0.0.20
108
+ pytz==2024.2
109
+ PyYAML==6.0.2
110
+ regex==2024.11.6
111
+ requests==2.32.3
112
+ requests-oauthlib==2.0.0
113
+ requests-toolbelt==1.0.0
114
+ rich==13.9.4
115
+ rsa==4.9
116
+ ruff==0.8.4
117
+ safehttpx==0.1.6
118
+ safetensors==0.4.5
119
+ scikit-learn==1.6.0
120
+ scipy==1.14.1
121
+ semantic-version==2.10.0
122
+ sentence-transformers==3.3.1
123
+ shellingham==1.5.4
124
+ six==1.17.0
125
+ sniffio==1.3.1
126
+ SQLAlchemy==2.0.36
127
+ starlette==0.41.3
128
+ sympy==1.13.1
129
+ tenacity==9.0.0
130
+ threadpoolctl==3.5.0
131
+ tiktoken==0.8.0
132
+ tokenizers==0.21.0
133
+ tomli==2.2.1
134
+ tomlkit==0.13.2
135
+ torch==2.5.1
136
+ tqdm==4.67.1
137
+ transformers==4.47.1
138
+ typer==0.15.1
139
+ typing-inspect==0.9.0
140
+ typing_extensions==4.12.2
141
+ tzdata==2024.2
142
+ urllib3==2.3.0
143
+ uvicorn==0.34.0
144
+ uvloop==0.21.0
145
+ watchfiles==1.0.3
146
+ websocket-client==1.8.0
147
+ websockets==14.1
148
+ wrapt==1.17.0
149
+ yarl==1.18.3
150
+ zipp==3.21.0