Nugh75 commited on
Commit
080146c
·
1 Parent(s): 2a36d42

moduliazione del programma

Browse files

Ogni funzionalità ha il proprio file e è nella propria cartella, bisogna continuare a raffinare il programma

app.py CHANGED
@@ -7,7 +7,6 @@ from app.functions.database_handling import list_databases
7
  from ui.chatbot_tab import create_chatbot_tab
8
  from ui.db_management_tab import create_db_management_tab
9
  from ui.document_management_tab import create_document_management_tab
10
- from ui.new_features_tab import create_new_features_tab
11
  from ui.info_tab import create_info_tab # Importa la nuova tab
12
 
13
  # Configura il logging
@@ -39,8 +38,7 @@ def main():
39
  chat_refs # Tab 4: Chatbot (ultima tab)
40
  doc_refs # Tab 2: Document Management
41
  db_refs(dropdowns)
42
- # create_db_management_tab(dropdowns) # Tab 1: DB Management
43
- create_new_features_tab() # Tab 3: Features
44
  info_refs # Tab 5: Info (ultima tab)
45
 
46
  rag_chatbot.launch()
 
7
  from ui.chatbot_tab import create_chatbot_tab
8
  from ui.db_management_tab import create_db_management_tab
9
  from ui.document_management_tab import create_document_management_tab
 
10
  from ui.info_tab import create_info_tab # Importa la nuova tab
11
 
12
  # Configura il logging
 
38
  chat_refs # Tab 4: Chatbot (ultima tab)
39
  doc_refs # Tab 2: Document Management
40
  db_refs(dropdowns)
41
+
 
42
  info_refs # Tab 5: Info (ultima tab)
43
 
44
  rag_chatbot.launch()
app/config.py CHANGED
@@ -1,5 +1,10 @@
1
  import os
2
  from dotenv import load_dotenv
 
 
 
 
 
3
 
4
  # Carica le variabili d'ambiente dal file .env
5
  load_dotenv()
@@ -7,4 +12,46 @@ load_dotenv()
7
  # Configurazione del modello
8
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
9
  if not OPENAI_API_KEY:
10
- raise ValueError("OPENAI_API_KEY non trovata. Verifica il file .env")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  from dotenv import load_dotenv
3
+ from enum import Enum
4
+ from openai import OpenAI
5
+ from pathlib import Path
6
+
7
+
8
 
9
  # Carica le variabili d'ambiente dal file .env
10
  load_dotenv()
 
12
  # Configurazione del modello
13
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
14
  if not OPENAI_API_KEY:
15
+ raise ValueError("OPENAI_API_KEY non trovata. Verifica il file .env")
16
+
17
+ class LLMType(Enum):
18
+ OPENAI_GPT_4O_MINI = "openai - GPT-4o-mini"
19
+ LOCAL_QWEN = "local - Qwen 7B"
20
+ LOCAL_PHI = "local - Phi-3 Mini"
21
+
22
+ # Configurazione modelli
23
+ LLM_CONFIGS = {
24
+ LLMType.OPENAI_GPT_4O_MINI: {
25
+ "client": lambda: OpenAI(api_key=OPENAI_API_KEY),
26
+ "model": "gpt-4-mini",
27
+ "base_url": None
28
+ },
29
+ LLMType.LOCAL_QWEN: {
30
+ "client": lambda: OpenAI(base_url="http://192.168.82.5:1234/v1", api_key="not-needed"),
31
+ "model": "qwen2.5-coder-7b-instruct",
32
+ "base_url": "http://192.168.82.5:1234/v1"
33
+ },
34
+ LLMType.LOCAL_PHI: {
35
+ "client": lambda: OpenAI(base_url="http://192.168.82.5:1234/v1", api_key="not-needed"),
36
+ "model": "phi-3.5-mini-ita",
37
+ "base_url": "http://192.168.82.5:1234/v1"
38
+ }
39
+ }
40
+
41
+ EMBEDDING_CONFIG = {
42
+ "model_name": "sentence-transformers/multi-qa-mpnet-base-dot-v1",
43
+ "chunk_size": 2000,
44
+ "chunk_overlap": 100,
45
+ "k_documents": 5,
46
+ "min_similarity": 0.7
47
+ }
48
+
49
+ # Aggiungi questa costante
50
+ EMBEDDING_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
51
+
52
+ # Definisci il percorso base per i database
53
+ BASE_DB_PATH = "db"
54
+
55
+ # Voci italiane edge-tts
56
+ VOICE_USER = "it-IT-DiegoNeural" # Voce maschile utente
57
+ VOICE_ASSISTANT = "it-IT-ElsaNeural" # Voce femminile assistente
app/document_handling.py CHANGED
@@ -1,79 +1,20 @@
1
  import logging
2
  import gradio as gr
3
  from langchain_community.vectorstores import FAISS
4
- from langchain_huggingface import HuggingFaceEmbeddings
5
  import os
6
- import shutil
7
  import PyPDF2
8
  from docx import Document
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
- from dataclasses import dataclass
11
  import json
12
  from datetime import datetime
13
  from app.functions.database_handling import BASE_DB_PATH
 
 
 
 
14
 
15
- # Initialize the text splitter
16
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
17
 
18
  # -------------- UTILITY FUNCTIONS --------------
19
- @dataclass
20
- class DocumentMetadata:
21
- """
22
- Classe per gestire i metadati dei documenti.
23
-
24
- Attributi:
25
- filename (str): Nome del file originale
26
- title (str): Titolo assegnato al documento
27
- author (str): Autore del documento
28
- upload_date (str): Data di caricamento
29
- chunks (int): Numero di chunks in cui è stato diviso il documento
30
- """
31
- filename: str
32
- title: str
33
- author: str
34
- upload_date: str
35
- chunks: int
36
-
37
- def to_dict(self):
38
- """Converte i metadati in un dizionario per il salvataggio JSON."""
39
- return {
40
- "filename": self.filename,
41
- "title": self.title,
42
- "author": self.author,
43
- "upload_date": self.upload_date,
44
- "chunks": self.chunks
45
- }
46
-
47
- def save_metadata(metadata_list, db_name):
48
- """
49
- Salva i metadati dei documenti nel database specificato.
50
-
51
- Args:
52
- metadata_list: Lista di oggetti DocumentMetadata da salvare
53
- db_name: Nome del database in cui salvare i metadati
54
-
55
- Note:
56
- I metadati vengono salvati in un file JSON nella directory del database
57
- """
58
- db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
59
- metadata_file = os.path.join(db_path, "metadata.json")
60
-
61
- # Crea la directory se non esiste
62
- if not os.path.exists(db_path):
63
- os.makedirs(db_path)
64
-
65
- # Carica metadati esistenti se presenti
66
- existing_metadata = []
67
- if os.path.exists(metadata_file):
68
- with open(metadata_file, 'r') as f:
69
- existing_metadata = json.load(f)
70
-
71
- # Aggiungi nuovi metadati
72
- existing_metadata.extend([m.to_dict() for m in metadata_list])
73
-
74
- # Salva il file aggiornato
75
- with open(metadata_file, 'w') as f:
76
- json.dump(existing_metadata, f, indent=2)
77
 
78
  def extract_text_from_pdf(file_path):
79
  """
@@ -108,34 +49,26 @@ def extract_text_from_docx(file_path):
108
  text += para.text + "\n"
109
  return text
110
 
 
 
 
 
 
 
 
 
 
111
 
112
 
113
-
114
-
115
- # -------------- CHATBOT TAB FUNCTIONS --------------
116
- def answer_question(question, db_name="default_db"):
117
- db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
118
- if not os.path.exists(db_path):
119
- logging.warning(f"L'indice FAISS per il database {db_name} non esiste.")
120
- return "Database non trovato."
121
-
122
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
123
- vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
124
-
125
- # Perform a similarity search
126
- docs = vectorstore.similarity_search(question)
127
-
128
- if not docs:
129
- return "Nessun documento corrispondente alla query."
130
-
131
- # Collect the document contents
132
- results = [doc.page_content for doc in docs]
133
- return "\n\n".join(results)
134
-
135
 
136
 
137
 
138
  # -------------- DOCUMENT MANAGEMENT TAB FUNCTIONS --------------
 
139
  def upload_and_index(files, title, author, db_name="default_db"):
140
  if not files:
141
  return "Nessun file caricato."
@@ -154,7 +87,7 @@ def upload_and_index(files, title, author, db_name="default_db"):
154
  with open(file.name, 'r', encoding='utf-8') as f:
155
  text = f.read()
156
 
157
- chunks = text_splitter.split_text(text)
158
 
159
  # Metadata per il documento
160
  doc_meta = DocumentMetadata(
@@ -189,7 +122,8 @@ def upload_and_index(files, title, author, db_name="default_db"):
189
  db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
190
  os.makedirs(db_path, exist_ok=True)
191
 
192
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
193
  texts = [doc["content"] for doc in documents]
194
  metadatas = [{k: v for k, v in doc.items() if k != "content"} for doc in documents]
195
 
@@ -265,26 +199,4 @@ def delete_file_from_database(file_name, db_name="default_db"):
265
  except Exception as e:
266
  return f"Errore durante la rimozione del file: {e}"
267
 
268
- # -------------- NEW FEATURES TAB FUNCTIONS --------------
269
- def search_documents(query, db_name="default_db"):
270
- db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
271
- if not os.path.exists(db_path):
272
- logging.warning(f"L'indice FAISS per il database '{db_name}' non esiste.")
273
- return "Database non trovato."
274
-
275
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
276
- vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
277
-
278
- # Perform a similarity search
279
- docs = vectorstore.similarity_search(query)
280
-
281
- if not docs:
282
- return "Nessun documento corrispondente alla query."
283
-
284
- # Collect the document contents
285
- results = [doc.page_content for doc in docs]
286
- return "\n\n".join(results)
287
 
288
- def generate_summary(db_name="default_db"):
289
- # Placeholder per la logica di summarization
290
- return "This is a summary of the documents in the database."
 
1
  import logging
2
  import gradio as gr
3
  from langchain_community.vectorstores import FAISS
 
4
  import os
 
5
  import PyPDF2
6
  from docx import Document
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
8
  import json
9
  from datetime import datetime
10
  from app.functions.database_handling import BASE_DB_PATH
11
+ from langchain_community.embeddings import HuggingFaceEmbeddings
12
+ from app.config import EMBEDDING_CONFIG, EMBEDDING_MODEL
13
+ from app.utils.embedding_utils import get_embeddings
14
+ from app.utils.dataclass_utils import DocumentMetadata, save_metadata
15
 
 
 
16
 
17
  # -------------- UTILITY FUNCTIONS --------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def extract_text_from_pdf(file_path):
20
  """
 
49
  text += para.text + "\n"
50
  return text
51
 
52
+ def create_chunks(text):
53
+ from app.config import EMBEDDING_CONFIG
54
+ text_splitter = RecursiveCharacterTextSplitter(
55
+ chunk_size=EMBEDDING_CONFIG["chunk_size"],
56
+ chunk_overlap=EMBEDDING_CONFIG["chunk_overlap"],
57
+ length_function=len,
58
+ separators=["\n\n", "\n", " ", ""]
59
+ )
60
+ return text_splitter.split_text(text)
61
 
62
 
63
+ def create_vectorstore(texts, metadatas, db_path):
64
+ embeddings = get_embeddings()
65
+ db = FAISS.from_texts(texts, embeddings, metadatas=metadatas)
66
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
 
70
  # -------------- DOCUMENT MANAGEMENT TAB FUNCTIONS --------------
71
+
72
  def upload_and_index(files, title, author, db_name="default_db"):
73
  if not files:
74
  return "Nessun file caricato."
 
87
  with open(file.name, 'r', encoding='utf-8') as f:
88
  text = f.read()
89
 
90
+ chunks = create_chunks(text)
91
 
92
  # Metadata per il documento
93
  doc_meta = DocumentMetadata(
 
122
  db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Modifica qui
123
  os.makedirs(db_path, exist_ok=True)
124
 
125
+ # Usa la funzione centralizzata invece dell'inizializzazione diretta
126
+ embeddings = get_embeddings()
127
  texts = [doc["content"] for doc in documents]
128
  metadatas = [{k: v for k, v in doc.items() if k != "content"} for doc in documents]
129
 
 
199
  except Exception as e:
200
  return f"Errore durante la rimozione del file: {e}"
201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
 
 
 
app/functions/database_handling.py CHANGED
@@ -3,9 +3,7 @@ import os
3
  import shutil
4
  from watchdog.observers import Observer
5
  from watchdog.events import FileSystemEventHandler
6
-
7
- # Definisci il percorso base per i database
8
- BASE_DB_PATH = "db"
9
 
10
  # Crea la cartella db se non esiste
11
  if not os.path.exists(BASE_DB_PATH):
 
3
  import shutil
4
  from watchdog.observers import Observer
5
  from watchdog.events import FileSystemEventHandler
6
+ from app.config import BASE_DB_PATH
 
 
7
 
8
  # Crea la cartella db se non esiste
9
  if not os.path.exists(BASE_DB_PATH):
app/llm_handling.py CHANGED
@@ -1,142 +1,39 @@
 
1
  import logging
2
  import os
3
- import shutil
4
- from enum import Enum
5
-
6
- from openai import OpenAI
7
  from langchain_community.vectorstores import FAISS
8
- from langchain_community.embeddings import HuggingFaceEmbeddings
9
- import gradio as gr
10
- import asyncio
11
- import edge_tts
12
- from pathlib import Path
13
  import requests
14
  from tenacity import retry, stop_after_attempt, wait_exponential
 
15
 
16
- from app.config import OPENAI_API_KEY
17
- from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
18
  from app.configs.prompts import SYSTEM_PROMPTS
19
-
20
- import json # Prima importa json se non è già importato
21
 
22
  logging.basicConfig(level=logging.INFO)
23
- local_ip="192.168.82.5:1234"
24
 
25
- class LLMType(Enum):
26
- OPENAI_GPT_4O_MINI = "openai - GPT-4o-mini"
27
- LOCAL_QWEN = "local - Qwen 7B"
28
- LOCAL_PHI = "local - Phi-3 Mini"
29
-
30
- # Configurazione modelli
31
- LLM_CONFIGS = {
32
- LLMType.OPENAI_GPT_4O_MINI: {
33
- "client": lambda: OpenAI(api_key=OPENAI_API_KEY),
34
- "model": "gpt-4-mini",
35
- "base_url": None
36
- },
37
- LLMType.LOCAL_QWEN: {
38
- "client": lambda: OpenAI(base_url="http://192.168.82.5:1234/v1", api_key="not-needed"),
39
- "model": "qwen2.5-coder-7b-instruct",
40
- "base_url": "http://192.168.82.5:1234/v1"
41
- },
42
- LLMType.LOCAL_PHI: {
43
- "client": lambda: OpenAI(base_url="http://192.168.82.5:1234/v1", api_key="not-needed"),
44
- "model": "phi-3.5-mini-ita",
45
- "base_url": "http://192.168.82.5:1234/v1"
46
- }
47
- }
48
 
49
  def get_llm_client(llm_type: LLMType):
50
- """Ottiene il client appropriato per il modello selezionato"""
51
  config = LLM_CONFIGS.get(llm_type)
52
  if not config:
53
- raise ValueError(f"Modello {llm_type} non supportato")
54
- return config["client"](), config["model"]
55
-
56
- # Voci italiane edge-tts
57
- VOICE_USER = "it-IT-DiegoNeural" # Voce maschile utente
58
- VOICE_ASSISTANT = "it-IT-ElsaNeural" # Voce femminile assistente
59
-
60
- async def text_to_speech(text, voice_name, output_file):
61
- """Genera audio usando edge-tts"""
62
- communicate = edge_tts.Communicate(text, voice_name)
63
- await communicate.save(output_file)
64
-
65
- def generate_speech(text, is_user=True):
66
- try:
67
- # Crea directory per audio temporanei
68
- audio_dir = Path("temp_audio")
69
- audio_dir.mkdir(exist_ok=True)
70
-
71
- # Seleziona voce e genera nome file
72
- voice = VOICE_USER if is_user else VOICE_ASSISTANT
73
- file_name = f"speech_{hash(text)}.mp3"
74
- output_path = audio_dir / file_name
75
-
76
- # Genera audio
77
- asyncio.run(text_to_speech(text, voice, str(output_path)))
78
- return str(output_path)
79
-
80
- except Exception as e:
81
- logging.error(f"Errore TTS: {e}")
82
- return None
83
-
84
- import re
85
-
86
- def clean_markdown(text):
87
- """Rimuove markdown dal testo"""
88
- text = re.sub(r'```[\s\S]*?```', '', text) # blocchi codice
89
- text = re.sub(r'`.*?`', '', text) # codice inline
90
- text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # link
91
- text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # bold
92
- text = re.sub(r'\*(.*?)\*', r'\1', text) # italic
93
- return text.strip()
94
-
95
- def generate_chat_audio(chat_history):
96
- """Genera audio della conversazione con voci alternate"""
97
- try:
98
- audio_files = []
99
- audio_dir = Path("temp_audio")
100
- audio_dir.mkdir(exist_ok=True)
101
-
102
- # Genera audio per ogni messaggio
103
- for msg in chat_history:
104
- content = clean_markdown(msg["content"])
105
- if not content.strip():
106
- continue
107
-
108
- voice = VOICE_USER if msg["role"] == "user" else VOICE_ASSISTANT
109
- file_name = f"chat_{msg['role']}_{hash(content)}.mp3"
110
- output_path = audio_dir / file_name
111
-
112
- # Genera audio senza prefissi
113
- asyncio.run(text_to_speech(content, voice, str(output_path)))
114
- audio_files.append(str(output_path))
115
-
116
- # Combina tutti gli audio
117
- if audio_files:
118
- from pydub import AudioSegment
119
- combined = AudioSegment.empty()
120
- for audio_file in audio_files:
121
- segment = AudioSegment.from_mp3(audio_file)
122
- combined += segment
123
-
124
- final_path = audio_dir / f"chat_complete_{hash(str(chat_history))}.mp3"
125
- combined.export(str(final_path), format="mp3")
126
- return str(final_path)
127
-
128
- return None
129
-
130
- except Exception as e:
131
- logging.error(f"Errore generazione audio: {e}")
132
- return None
133
 
134
  def get_system_prompt(prompt_type="tutor"):
135
- """Seleziona il prompt di sistema appropriato"""
136
  return SYSTEM_PROMPTS.get(prompt_type, SYSTEM_PROMPTS["tutor"])
137
 
138
  def test_local_connection():
139
- """Verifica la connessione al server LLM locale"""
140
  try:
141
  response = requests.get(f"http://192.168.82.5:1234/v1/health", timeout=5)
142
  return response.status_code == 200
@@ -150,111 +47,108 @@ def read_metadata(db_path):
150
  return json.load(f)
151
  return []
152
 
153
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
154
- def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=LLMType.OPENAI_GPT_4O_MINI):
155
- """
156
- Risponde alla domanda 'question' usando i documenti del database 'db_name'.
157
- Restituisce una lista di 2 messaggi in formato:
158
- [
159
- {"role": "user", "content": <domanda>},
160
- {"role": "assistant", "content": <risposta>}
161
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- In questa versione, viene effettuato il log dei 'chunk' recuperati durante
164
- la ricerca di similarità.
165
- """
 
 
 
 
 
166
  if chat_history is None:
167
  chat_history = []
168
-
169
- logging.info(f"Inizio elaborazione domanda: {question} per database: {db_name}")
170
-
171
  try:
172
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
173
- db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Percorso corretto
174
-
175
- logging.info(f"Verifico esistenza database in: {db_path}")
176
- if not os.path.exists(db_path):
177
- logging.warning(f"Database {db_name} non trovato in {db_path}")
 
178
  return [
179
  {"role": "user", "content": question},
180
- {"role": "assistant", "content": f"Database non trovato in {db_path}"}
181
  ]
182
-
183
- # Carica l'indice FAISS
184
- vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
185
-
186
- # Cerca i documenti (chunk) più simili
187
- relevant_docs = vectorstore.similarity_search(question, k=5)
188
-
189
- metadata_list = read_metadata(db_path)
190
- metadata_dict = {m["filename"]: m for m in metadata_list}
191
-
192
- # Logga i chunk recuperati con metadata
193
- for idx, doc in enumerate(relevant_docs):
194
- logging.info(f"--- Chunk {idx+1} ---")
195
  source_file = doc.metadata.get("source", "Unknown")
196
-
197
- # Recupera i metadata dal file json
198
  if source_file in metadata_dict:
199
- file_metadata = metadata_dict[source_file]
200
- logging.info(f"📚 Titolo: {file_metadata['title']}")
201
- logging.info(f"✍️ Autore: {file_metadata['author']}")
202
-
203
- logging.info(f"📄 Contenuto:")
204
- logging.info(doc.page_content)
205
- logging.info("---------------------")
206
-
207
- # Prepara il contesto dai documenti
208
- context = "\n".join([doc.page_content for doc in relevant_docs])
209
  prompt = SYSTEM_PROMPTS[prompt_type].format(context=context)
210
-
211
- # Prepara la cronologia completa delle conversazioni
212
- conversation_history = []
213
- for msg in chat_history: # Rimuovo limite di 4 messaggi
214
- conversation_history.append({
215
- "role": msg["role"],
216
- "content": msg["content"]
217
- })
218
-
219
- # Costruisci messaggio con contesto completo
220
  messages = [
221
  {"role": "system", "content": prompt},
222
- *conversation_history, # Includi tutta la cronologia
223
  {"role": "user", "content": question}
224
  ]
225
-
226
- if "local" in str(llm_type):
227
- if not test_local_connection():
228
- raise ConnectionError("LM Studio non raggiungibile")
229
-
230
  client, model = get_llm_client(llm_type)
231
  response = client.chat.completions.create(
232
  model=model,
233
  messages=messages,
234
  temperature=0.7,
235
- max_tokens=2048 # Aumenta token per gestire conversazioni lunghe
236
  )
237
- answer = response.choices[0].message.content
238
-
239
- # Genera audio per domanda e risposta
240
- user_audio = generate_speech(question, is_user=True)
241
- assistant_audio = generate_speech(answer, is_user=False)
242
-
243
  return [
244
- {"role": "user", "content": question, "audio": user_audio},
245
- {"role": "assistant", "content": answer, "audio": assistant_audio}
246
  ]
247
-
248
  except Exception as e:
249
- logging.error(f"Errore durante la generazione della risposta: {e}")
250
- error_msg = "LLM locale non disponibile. Riprova più tardi o usa OpenAI." if "local" in str(llm_type) else str(e)
251
  return [
252
  {"role": "user", "content": question},
253
  {"role": "assistant", "content": f"⚠️ {error_msg}"}
254
  ]
255
 
256
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
  if __name__ == "__main__":
259
-
260
- pass
 
1
+ # llm_handling.py
2
  import logging
3
  import os
 
 
 
 
4
  from langchain_community.vectorstores import FAISS
 
 
 
 
 
5
  import requests
6
  from tenacity import retry, stop_after_attempt, wait_exponential
7
+ import json
8
 
9
+ from app.config import BASE_DB_PATH # Ensure correct import
10
+ from app.config import LLM_CONFIGS, LLMType # Import LLMType and LLM_CONFIGS
11
  from app.configs.prompts import SYSTEM_PROMPTS
12
+ from app.utils.embedding_utils import get_embeddings
13
+ from app.utils.voice_utils import generate_speech # Retain import if needed
14
 
15
  logging.basicConfig(level=logging.INFO)
 
16
 
17
+ # =====================================
18
+ # Functions related to LLM
19
+ # =====================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def get_llm_client(llm_type: LLMType):
22
+ """Obtains the appropriate client for the selected model"""
23
  config = LLM_CONFIGS.get(llm_type)
24
  if not config:
25
+ raise ValueError(f"Model {llm_type} not supported")
26
+ client_class = config["client"]
27
+ model = config["model"]
28
+ client = client_class() # Ensure no arguments are needed
29
+ return client, model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def get_system_prompt(prompt_type="tutor"):
32
+ """Selects the appropriate system prompt"""
33
  return SYSTEM_PROMPTS.get(prompt_type, SYSTEM_PROMPTS["tutor"])
34
 
35
  def test_local_connection():
36
+ """Checks connection to the local LLM server"""
37
  try:
38
  response = requests.get(f"http://192.168.82.5:1234/v1/health", timeout=5)
39
  return response.status_code == 200
 
47
  return json.load(f)
48
  return []
49
 
50
+ def get_relevant_documents(vectorstore, question, min_similarity=0.7):
51
+ """Retrieves relevant documents from the vectorstore"""
52
+ try:
53
+ enhanced_query = enhance_query(question)
54
+ docs_and_scores = vectorstore.similarity_search_with_score(
55
+ enhanced_query,
56
+ k=8
57
+ )
58
+ filtered_docs = [
59
+ doc for doc, score in docs_and_scores if score >= min_similarity
60
+ ]
61
+ logging.info(f"Query: {question}")
62
+ logging.info(f"Documents found: {len(filtered_docs)}")
63
+ return filtered_docs[:5] if filtered_docs else []
64
+ except Exception as e:
65
+ logging.error(f"Error retrieving documents: {e}")
66
+ return []
67
+
68
+ def enhance_query(question):
69
+ stop_words = set(['il', 'lo', 'la', 'i', 'gli', 'le', 'un', 'uno', 'una'])
70
+ words = [w for w in question.lower().split() if w not in stop_words]
71
+ enhanced_query = " ".join(words)
72
+ return enhanced_query
73
 
74
+ def log_search_results(question, docs_and_scores):
75
+ logging.info(f"Query: {question}")
76
+ for idx, (doc, score) in enumerate(docs_and_scores, 1):
77
+ logging.info(f"Doc {idx} - Score: {score:.4f}")
78
+ logging.info(f"Content: {doc.page_content[:100]}...")
79
+
80
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
81
+ def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=None):
82
  if chat_history is None:
83
  chat_history = []
 
 
 
84
  try:
85
+ embeddings = get_embeddings()
86
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
87
+ metadata_list = read_metadata(db_path)
88
+ metadata_dict = {m["filename"]: m for m in metadata_list}
89
+ vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
90
+ relevant_docs = get_relevant_documents(vectorstore, question)
91
+ if not relevant_docs:
92
  return [
93
  {"role": "user", "content": question},
94
+ {"role": "assistant", "content": "Sorry, no relevant information found to answer your question. Try rephrasing or asking a different question."}
95
  ]
96
+ sources = []
97
+ for idx, doc in enumerate(relevant_docs, 1):
 
 
 
 
 
 
 
 
 
 
 
98
  source_file = doc.metadata.get("source", "Unknown")
 
 
99
  if source_file in metadata_dict:
100
+ meta = metadata_dict[source_file]
101
+ sources.append(f"📚 {meta['title']} (Author: {meta['author']}) - Part {idx} of {len(relevant_docs)}")
102
+ context = "\n".join([
103
+ f"[Part {idx+1} of {len(relevant_docs)}]\n{doc.page_content}"
104
+ for idx, doc in enumerate(relevant_docs)
105
+ ])
106
+ sources_text = "\n\nSources consulted:\n" + "\n".join(set(sources))
 
 
 
107
  prompt = SYSTEM_PROMPTS[prompt_type].format(context=context)
108
+ prompt += "\nAlways cite the sources used for your response, including the document title and author."
 
 
 
 
 
 
 
 
 
109
  messages = [
110
  {"role": "system", "content": prompt},
111
+ *[{"role": m["role"], "content": m["content"]} for m in chat_history],
112
  {"role": "user", "content": question}
113
  ]
 
 
 
 
 
114
  client, model = get_llm_client(llm_type)
115
  response = client.chat.completions.create(
116
  model=model,
117
  messages=messages,
118
  temperature=0.7,
119
+ max_tokens=2048
120
  )
121
+ answer = response.choices[0].message.content + sources_text
 
 
 
 
 
122
  return [
123
+ {"role": "user", "content": question},
124
+ {"role": "assistant", "content": answer}
125
  ]
 
126
  except Exception as e:
127
+ logging.error(f"Error generating response: {e}")
128
+ error_msg = "Local LLM not available. Try again later or use OpenAI." if "local" in str(llm_type) else str(e)
129
  return [
130
  {"role": "user", "content": question},
131
  {"role": "assistant", "content": f"⚠️ {error_msg}"}
132
  ]
133
 
134
+ class DocumentRetriever:
135
+ def __init__(self, db_path):
136
+ self.embeddings = get_embeddings()
137
+ self.vectorstore = FAISS.load_local(
138
+ db_path,
139
+ self.embeddings,
140
+ allow_dangerous_deserialization=True
141
+ )
142
+
143
+ def get_relevant_chunks(self, question):
144
+ enhanced_query = enhance_query(question)
145
+ docs_and_scores = self.vectorstore.similarity_search_with_score(
146
+ enhanced_query,
147
+ k=8
148
+ )
149
+ log_search_results(question, docs_and_scores)
150
+ # Implement _filter_relevant_docs or remove the call
151
+ # return self._filter_relevant_docs(docs_and_scores)
152
 
153
  if __name__ == "__main__":
154
+ pass
 
app/llm_handling2.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llm_handling.py
2
+ import logging
3
+ import os
4
+ from langchain_community.vectorstores import FAISS
5
+ import requests
6
+ from tenacity import retry, stop_after_attempt, wait_exponential
7
+ import json
8
+
9
+ from app.config import *
10
+ from app.configs.prompts import SYSTEM_PROMPTS
11
+ from app.utils.embedding_utils import get_embeddings
12
+ from app.utils.voice_utils import generate_speech
13
+
14
+ logging.basicConfig(level=logging.INFO)
15
+
16
+ # =====================================
17
+ # Funzioni relative al LLM
18
+ # =====================================
19
+
20
+ def get_llm_client(llm_type: LLMType):
21
+ """Ottiene il client appropriato per il modello selezionato"""
22
+ config = LLM_CONFIGS.get(llm_type)
23
+ if not config:
24
+ raise ValueError(f"Modello {llm_type} non supportato")
25
+ return config["client"](), config["model"]
26
+
27
+ def get_system_prompt(prompt_type="tutor"):
28
+ """Seleziona il prompt di sistema appropriato"""
29
+ return SYSTEM_PROMPTS.get(prompt_type, SYSTEM_PROMPTS["tutor"])
30
+
31
+ def test_local_connection():
32
+ """Verifica la connessione al server LLM locale"""
33
+ try:
34
+ response = requests.get(f"http://192.168.82.5:1234/v1/health", timeout=5)
35
+ return response.status_code == 200
36
+ except:
37
+ return False
38
+
39
+ def read_metadata(db_path):
40
+ metadata_file = os.path.join(db_path, "metadata.json")
41
+ if os.path.exists(metadata_file):
42
+ with open(metadata_file, 'r') as f:
43
+ return json.load(f)
44
+ return []
45
+
46
+ def get_relevant_documents(vectorstore, question, min_similarity=0.7):
47
+ """Recupera i documenti rilevanti dal vectorstore"""
48
+ try:
49
+ # Migliora la query prima della ricerca
50
+ enhanced_query = enhance_query(question)
51
+
52
+ # Ottieni documenti con punteggi di similarità
53
+ docs_and_scores = vectorstore.similarity_search_with_score(
54
+ enhanced_query,
55
+ k=8 # Aumenta il numero di documenti recuperati
56
+ )
57
+
58
+ # Filtra i documenti per similarità
59
+ filtered_docs = [
60
+ doc for doc, score in docs_and_scores
61
+ if score >= min_similarity
62
+ ]
63
+
64
+ # Log dei risultati per debug
65
+ logging.info(f"Query: {question}")
66
+ logging.info(f"Documenti trovati: {len(filtered_docs)}")
67
+
68
+ # Restituisci almeno un documento o una lista vuota
69
+ return filtered_docs[:5] if filtered_docs else []
70
+
71
+ except Exception as e:
72
+ logging.error(f"Errore nel recupero dei documenti: {e}")
73
+ return [] # Restituisce lista vuota invece di None
74
+
75
+ def enhance_query(question):
76
+ # Rimuovi parole non significative
77
+ stop_words = set(['il', 'lo', 'la', 'i', 'gli', 'le', 'un', 'uno', 'una'])
78
+ words = [w for w in question.lower().split() if w not in stop_words]
79
+
80
+ # Estrai keywords chiave
81
+ enhanced_query = " ".join(words)
82
+ return enhanced_query
83
+
84
+ def log_search_results(question, docs_and_scores):
85
+ logging.info(f"Query: {question}")
86
+ for idx, (doc, score) in enumerate(docs_and_scores, 1):
87
+ logging.info(f"Doc {idx} - Score: {score:.4f}")
88
+ logging.info(f"Content: {doc.page_content[:100]}...")
89
+
90
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
91
+ def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=LLMType.OPENAI_GPT_4O_MINI):
92
+ if chat_history is None:
93
+ chat_history = []
94
+
95
+ try:
96
+ embeddings = get_embeddings() # Usa la funzione comune
97
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
98
+
99
+ # Leggi i metadati
100
+ metadata_list = read_metadata(db_path)
101
+ metadata_dict = {m["filename"]: m for m in metadata_list}
102
+
103
+ # Recupera i documenti rilevanti
104
+ vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
105
+ relevant_docs = get_relevant_documents(vectorstore, question)
106
+
107
+ if not relevant_docs:
108
+ return [
109
+ {"role": "user", "content": question},
110
+ {"role": "assistant", "content": "Mi dispiace, non ho trovato informazioni rilevanti per rispondere alla tua domanda. Prova a riformularla o a fare una domanda diversa."}
111
+ ]
112
+
113
+ # Prepara le citazioni delle fonti con numerazione dei chunk
114
+ sources = []
115
+ for idx, doc in enumerate(relevant_docs, 1):
116
+ source_file = doc.metadata.get("source", "Unknown")
117
+ if source_file in metadata_dict:
118
+ meta = metadata_dict[source_file]
119
+ sources.append(f"📚 {meta['title']} (Autore: {meta['author']}) - Parte {idx} di {len(relevant_docs)}")
120
+
121
+ # Prepara il contesto con le fonti
122
+ context = "\n".join([
123
+ f"[Parte {idx+1} di {len(relevant_docs)}]\n{doc.page_content}"
124
+ for idx, doc in enumerate(relevant_docs)
125
+ ])
126
+ sources_text = "\n\nFonti consultate:\n" + "\n".join(set(sources))
127
+
128
+ # Aggiorna il prompt per includere la richiesta di citare le fonti
129
+ prompt = SYSTEM_PROMPTS[prompt_type].format(context=context)
130
+ prompt += "\nCita sempre le fonti utilizzate per la tua risposta includendo il titolo del documento e l'autore."
131
+
132
+ # Costruisci il messaggio completo
133
+ messages = [
134
+ {"role": "system", "content": prompt},
135
+ *[{"role": m["role"], "content": m["content"]} for m in chat_history],
136
+ {"role": "user", "content": question}
137
+ ]
138
+
139
+ # Ottieni la risposta dall'LLM
140
+ client, model = get_llm_client(llm_type)
141
+ response = client.chat.completions.create(
142
+ model=model,
143
+ messages=messages,
144
+ temperature=0.7,
145
+ max_tokens=2048
146
+ )
147
+ answer = response.choices[0].message.content + sources_text
148
+
149
+
150
+
151
+
152
+ # return [
153
+ # {"role": "user", "content": question, "audio": user_audio},
154
+ # {"role": "assistant", "content": answer, "audio": assistant_audio}
155
+ # ]
156
+
157
+ except Exception as e:
158
+ logging.error(f"Errore durante la generazione della risposta: {e}")
159
+ error_msg = "LLM locale non disponibile. Riprova più tardi o usa OpenAI." if "local" in str(llm_type) else str(e)
160
+ return [
161
+ {"role": "user", "content": question},
162
+ {"role": "assistant", "content": f"⚠️ {error_msg}"}
163
+ ]
164
+
165
+ class DocumentRetriever:
166
+ def __init__(self, db_path):
167
+ self.embeddings = get_embeddings()
168
+ self.vectorstore = FAISS.load_local(
169
+ db_path,
170
+ self.embeddings,
171
+ allow_dangerous_deserialization=True
172
+ )
173
+
174
+ def get_relevant_chunks(self, question):
175
+ enhanced_query = enhance_query(question)
176
+ docs_and_scores = self.vectorstore.similarity_search_with_score(
177
+ enhanced_query,
178
+ k=8
179
+ )
180
+
181
+ log_search_results(question, docs_and_scores)
182
+ return self._filter_relevant_docs(docs_and_scores)
183
+
184
+
185
+
186
+
187
+ if __name__ == "__main__":
188
+ pass
{utils → app/utils}/__init__.py RENAMED
File without changes
app/utils/dataclass_utils.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from dataclasses import dataclass
4
+ from app.functions.database_handling import BASE_DB_PATH
5
+
6
+ @dataclass
7
+ class DocumentMetadata:
8
+ """
9
+ Classe per gestire i metadati dei documenti.
10
+
11
+ Attributi:
12
+ filename (str): Nome del file originale
13
+ title (str): Titolo assegnato al documento
14
+ author (str): Autore del documento
15
+ upload_date (str): Data di caricamento
16
+ chunks (int): Numero di chunks in cui è stato diviso il documento
17
+ """
18
+ filename: str
19
+ title: str
20
+ author: str
21
+ upload_date: str
22
+ chunks: int
23
+
24
+ def to_dict(self):
25
+ """Converte i metadati in un dizionario per il salvataggio JSON."""
26
+ return {
27
+ "filename": self.filename,
28
+ "title": self.title,
29
+ "author": self.author,
30
+ "upload_date": self.upload_date,
31
+ "chunks": self.chunks
32
+ }
33
+
34
+ def save_metadata(metadata_list, db_name):
35
+ """
36
+ Salva i metadati dei documenti nel database specificato.
37
+
38
+ Args:
39
+ metadata_list: Lista di oggetti DocumentMetadata da salvare
40
+ db_name: Nome del database in cui salvare i metadati
41
+
42
+ Note:
43
+ I metadati vengono salvati in un file JSON nella directory del database
44
+ """
45
+ db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
46
+ metadata_file = os.path.join(db_path, "metadata.json")
47
+
48
+ if not os.path.exists(db_path):
49
+ os.makedirs(db_path)
50
+
51
+ existing_metadata = []
52
+ if os.path.exists(metadata_file):
53
+ with open(metadata_file, 'r') as f:
54
+ existing_metadata = json.load(f)
55
+
56
+ existing_metadata.extend([m.to_dict() for m in metadata_list])
57
+
58
+ with open(metadata_file, 'w') as f:
59
+ json.dump(existing_metadata, f, indent=2)
app/utils/embedding_utils.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
+ from app.config import EMBEDDING_CONFIG
4
+ def get_embeddings():
5
+
6
+ """Inizializza gli embeddings usando il modello configurato"""
7
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
8
+ return HuggingFaceEmbeddings(
9
+ model_name=EMBEDDING_CONFIG["model_name"],
10
+ model_kwargs={'device': device}
11
+ )
12
+
{utils → app/utils}/helpers.py RENAMED
File without changes
app/utils/markdowns_utils.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ # =====================================
4
+ # Funzioni relative al Markdown
5
+ # =====================================
6
+
7
+ def clean_markdown(text):
8
+ """Rimuove markdown dal testo"""
9
+ text = re.sub(r'```[\s\S]*?```', '', text) # blocchi codice
10
+ text = re.sub(r'`.*?`', '', text) # codice inline
11
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # link
12
+ text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # bold
13
+ text = re.sub(r'\*(.*?)\*', r'\1', text) # italic
14
+ return text.strip()
app/utils/voice_utils.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import asyncio
3
+ import edge_tts
4
+ from app.config import VOICE_USER, VOICE_ASSISTANT
5
+ from pathlib import Path
6
+ from app.utils.markdowns_utils import clean_markdown
7
+
8
+
9
+ async def text_to_speech(text, voice_name, output_file):
10
+ """Genera audio usando edge-tts"""
11
+ communicate = edge_tts.Communicate(text, voice_name)
12
+ await communicate.save(output_file)
13
+
14
+ def generate_speech(text, is_user=True):
15
+ try:
16
+ # Crea directory per audio temporanei
17
+ audio_dir = Path("temp_audio")
18
+ audio_dir.mkdir(exist_ok=True)
19
+
20
+ # Seleziona voce e genera nome file
21
+ voice = VOICE_USER if is_user else VOICE_ASSISTANT
22
+ file_name = f"speech_{hash(text)}.mp3"
23
+ output_path = audio_dir / file_name
24
+
25
+ # Genera audio
26
+ asyncio.run(text_to_speech(text, voice, str(output_path)))
27
+ return str(output_path)
28
+
29
+ except Exception as e:
30
+ logging.error(f"Errore TTS: {e}")
31
+ return None
32
+
33
+ def generate_chat_audio(chat_history):
34
+ """Genera audio della conversazione con voci alternate"""
35
+ try:
36
+ audio_files = []
37
+ audio_dir = Path("temp_audio")
38
+ audio_dir.mkdir(exist_ok=True)
39
+
40
+ # Genera audio per ogni messaggio
41
+ for msg in chat_history:
42
+ content = clean_markdown(msg["content"])
43
+ if not content.strip():
44
+ continue
45
+
46
+ voice = VOICE_USER if msg["role"] == "user" else VOICE_ASSISTANT
47
+ file_name = f"chat_{msg['role']}_{hash(content)}.mp3"
48
+ output_path = audio_dir / file_name
49
+
50
+ # Genera audio senza prefissi
51
+ asyncio.run(text_to_speech(content, voice, str(output_path)))
52
+ audio_files.append(str(output_path))
53
+
54
+ # Combina tutti gli audio
55
+ if audio_files:
56
+ from pydub import AudioSegment
57
+
58
+ combined = AudioSegment.empty()
59
+ for audio_file in audio_files:
60
+ segment = AudioSegment.from_mp3(audio_file)
61
+ combined += segment
62
+
63
+ final_path = audio_dir / f"chat_complete_{hash(str(chat_history))}.mp3"
64
+ combined.export(str(final_path), format="mp3")
65
+ return str(final_path)
66
+
67
+ return None
68
+
69
+ except Exception as e:
70
+ logging.error(f"Errore generazione audio: {e}")
71
+ return None
db/.DS_Store CHANGED
Binary files a/db/.DS_Store and b/db/.DS_Store differ
 
db/faiss_index/index.faiss DELETED
Binary file (1.58 kB)
 
db/faiss_index/index.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:407d95e0808ddf251e3fb442241edd72c47961f5a38d5546021ef205b9fdeb57
3
- size 960117
 
 
 
 
db/faiss_index_Daniele2/.DS_Store DELETED
Binary file (6.15 kB)
 
db/faiss_index_Daniele2/index.faiss DELETED
Binary file (3.12 kB)
 
db/faiss_index_Daniele2/index.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e79bcca55b5153ea71218a3d2204c01ec1eccf59162fd4547d19956a4750d04e
3
- size 2958
 
 
 
 
db/faiss_index_Daniele2/metadata.json DELETED
@@ -1,9 +0,0 @@
1
- [
2
- {
3
- "filename": "istruzioni obiettivi di apprendimento.pdf",
4
- "title": "Obiettivi di apprendimento",
5
- "author": "Daniele",
6
- "upload_date": "2025-01-02 15:14:19",
7
- "chunks": 2
8
- }
9
- ]
 
 
 
 
 
 
 
 
 
 
db/faiss_index_Orienta/index.faiss DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4ffbc57fcbef553e507d44c7708b4e23b947f5af97c13d97359f3d814fc562a
3
- size 2362413
 
 
 
 
db/faiss_index_Orienta/index.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a89e4e492e28b30ef9ede1fd176dfcdac5e973884a56ab9d217a695136be8349
3
- size 3303433
 
 
 
 
db/faiss_index_Orienta/metadata.json DELETED
@@ -1,9 +0,0 @@
1
- [
2
- {
3
- "filename": "Imparare a dirigere se stessi.pdf",
4
- "title": "Imparare a dirigere se stessi ",
5
- "author": "Pellerey",
6
- "upload_date": "2025-01-02 22:47:28",
7
- "chunks": 1538
8
- }
9
- ]
 
 
 
 
 
 
 
 
 
 
db/faiss_index_default_db/index.faiss DELETED
Binary file (309 kB)
 
db/faiss_index_default_db/index.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c797df1c4a8ddac75b4b083391220179ce5bbcd2b962b4dfbc7d960628cd0b2
3
- size 107706
 
 
 
 
ui/chatbot_tab.py CHANGED
@@ -4,12 +4,13 @@ import logging
4
  import gradio as gr
5
  from app.functions.database_handling import list_databases
6
  from app.configs.prompts import SYSTEM_PROMPTS
7
- from app.llm_handling import answer_question, LLMType, generate_chat_audio
8
- from utils.helpers import extract_text_from_files
9
-
10
- logging.basicConfig(level=logging.INFO)
11
 
12
 
 
13
 
14
 
15
  def create_chatbot_tab():
@@ -87,26 +88,26 @@ def create_chatbot_tab():
87
 
88
  return str(Path(temp_path).absolute())
89
 
90
- def download_audio(chat_history):
91
- """Scarica l'ultimo messaggio audio dalla chat"""
92
- try:
93
- if not chat_history:
94
- gr.Warning("Nessun messaggio nella chat")
95
- return None
96
 
97
- # Prendi l'ultimo messaggio assistant
98
- for msg in reversed(chat_history):
99
- if msg["role"] == "assistant" and "audio" in msg:
100
- audio_path = msg["audio"]
101
- if audio_path and os.path.exists(audio_path):
102
- return audio_path
103
 
104
- gr.Warning("Nessun audio disponibile per l'ultima risposta")
105
- return None
106
 
107
- except Exception as e:
108
- gr.Error(f"Errore durante il download dell'audio: {str(e)}")
109
- return None
110
 
111
  def format_conversation_for_audio(chat_history):
112
  """Formatta la conversazione per la sintesi vocale"""
@@ -116,25 +117,25 @@ def create_chatbot_tab():
116
  audio_text.append(f"{role} dice: {msg['content']}")
117
  return "\n".join(audio_text)
118
 
119
- def generate_conversation_audio(chat_history):
120
- """Genera audio della conversazione completa"""
121
- try:
122
- if not chat_history:
123
- gr.Warning("Nessun messaggio nella chat")
124
- return None
125
 
126
- conversation_text = format_conversation_for_audio(chat_history)
127
- audio_path = generate_speech(conversation_text, is_user=False)
128
 
129
- if audio_path and os.path.exists(audio_path):
130
- return audio_path
131
- else:
132
- gr.Warning("Errore nella generazione dell'audio")
133
- return None
134
 
135
- except Exception as e:
136
- gr.Error(f"Errore: {str(e)}")
137
- return None
138
 
139
  def convert_chat_to_audio(chat_history):
140
  if not chat_history:
 
4
  import gradio as gr
5
  from app.functions.database_handling import list_databases
6
  from app.configs.prompts import SYSTEM_PROMPTS
7
+ from app.llm_handling import answer_question, LLMType
8
+ from app.utils.helpers import extract_text_from_files
9
+ from app.utils.voice_utils import *
10
+ from app.utils.markdowns_utils import clean_markdown
11
 
12
 
13
+ logging.basicConfig(level=logging.INFO)
14
 
15
 
16
  def create_chatbot_tab():
 
88
 
89
  return str(Path(temp_path).absolute())
90
 
91
+ # def download_audio(chat_history):
92
+ # """Scarica l'ultimo messaggio audio dalla chat"""
93
+ # try:
94
+ # if not chat_history:
95
+ # gr.Warning("Nessun messaggio nella chat")
96
+ # return None
97
 
98
+ # # Prendi l'ultimo messaggio assistant
99
+ # for msg in reversed(chat_history):
100
+ # if msg["role"] == "assistant" and "audio" in msg:
101
+ # audio_path = msg["audio"]
102
+ # if audio_path and os.path.exists(audio_path):
103
+ # return audio_path
104
 
105
+ # gr.Warning("Nessun audio disponibile per l'ultima risposta")
106
+ # return None
107
 
108
+ # except Exception as e:
109
+ # gr.Error(f"Errore durante il download dell'audio: {str(e)}")
110
+ # return None
111
 
112
  def format_conversation_for_audio(chat_history):
113
  """Formatta la conversazione per la sintesi vocale"""
 
117
  audio_text.append(f"{role} dice: {msg['content']}")
118
  return "\n".join(audio_text)
119
 
120
+ # def generate_conversation_audio(chat_history):
121
+ # """Genera audio della conversazione completa"""
122
+ # try:
123
+ # if not chat_history:
124
+ # gr.Warning("Nessun messaggio nella chat")
125
+ # return None
126
 
127
+ # conversation_text = format_conversation_for_audio(chat_history)
128
+ # audio_path = generate_speech(conversation_text, is_user=False)
129
 
130
+ # if audio_path and os.path.exists(audio_path):
131
+ # return audio_path
132
+ # else:
133
+ # gr.Warning("Errore nella generazione dell'audio")
134
+ # return None
135
 
136
+ # except Exception as e:
137
+ # gr.Error(f"Errore: {str(e)}")
138
+ # return None
139
 
140
  def convert_chat_to_audio(chat_history):
141
  if not chat_history:
ui/new_features_tab.py DELETED
@@ -1,43 +0,0 @@
1
- # ui/new_features_tab.py
2
-
3
- import gradio as gr
4
- from app.document_handling import search_documents
5
- from app.functions.database_handling import list_databases
6
-
7
- def create_new_features_tab():
8
- """Crea il tab 'Nuove Funzionalità' dell'interfaccia Gradio."""
9
-
10
- def search_documents_callback(query, db_name):
11
- """Cerca documenti nel database in base alla query."""
12
- results = search_documents(query, db_name)
13
- return "\n".join(results)
14
-
15
- # Ottieni la lista dei database
16
- databases = list_databases()
17
-
18
- with gr.Tab("Nuove Funzionalità"):
19
- gr.Markdown("## Cerca Documenti e Genera Riassunto")
20
-
21
- db_name_new = gr.Dropdown(choices=databases, label="Seleziona Database", value="default_db")
22
- search_input = gr.Textbox(label="Inserisci Termini di Ricerca")
23
- search_button = gr.Button("Cerca Documenti")
24
- search_output = gr.Textbox(label="Documenti Trovati")
25
-
26
- summary_button = gr.Button("Genera Riassunto")
27
- summary_output = gr.Textbox(label="Riassunto")
28
-
29
- # Evento per il bottone di ricerca
30
- search_button.click(
31
- search_documents_callback,
32
- inputs=[search_input, db_name_new],
33
- outputs=search_output
34
- )
35
-
36
- # Evento per il bottone di generazione riassunto (implementare generate_summary se necessario)
37
- # summary_button.click(
38
- # generate_summary,
39
- # inputs=db_name_new,
40
- # outputs=summary_output
41
- # )
42
-
43
- return