Edurag_beta / app /llm_handling.py
Nugh75's picture
download conversazione e audio conversazione
403260d
raw
history blame
7.47 kB
import logging
import os
import shutil
from enum import Enum
from openai import OpenAI
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import gradio as gr
import asyncio
import edge_tts
from pathlib import Path
from app.config import OPENAI_API_KEY
from app.functions.database_handling import BASE_DB_PATH # Aggiungi questo import
from app.configs.prompts import SYSTEM_PROMPTS
logging.basicConfig(level=logging.INFO)
class LLMType(Enum):
OPENAI = "openai"
LOCAL = "local"
# Client OpenAI standard
openai_client = OpenAI(api_key=OPENAI_API_KEY)
# Client LM Studio locale
local_client = OpenAI(
base_url="http://192.168.140.5:1234/v1",
api_key="not-needed"
)
# Voci italiane edge-tts
VOICE_USER = "it-IT-DiegoNeural" # Voce maschile utente
VOICE_ASSISTANT = "it-IT-ElsaNeural" # Voce femminile assistente
async def text_to_speech(text, voice_name, output_file):
"""Genera audio usando edge-tts"""
communicate = edge_tts.Communicate(text, voice_name)
await communicate.save(output_file)
def generate_speech(text, is_user=True):
try:
# Crea directory per audio temporanei
audio_dir = Path("temp_audio")
audio_dir.mkdir(exist_ok=True)
# Seleziona voce e genera nome file
voice = VOICE_USER if is_user else VOICE_ASSISTANT
file_name = f"speech_{hash(text)}.mp3"
output_path = audio_dir / file_name
# Genera audio
asyncio.run(text_to_speech(text, voice, str(output_path)))
return str(output_path)
except Exception as e:
logging.error(f"Errore TTS: {e}")
return None
import re
def clean_markdown(text):
"""Rimuove markdown dal testo"""
text = re.sub(r'```[\s\S]*?```', '', text) # blocchi codice
text = re.sub(r'`.*?`', '', text) # codice inline
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # link
text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # bold
text = re.sub(r'\*(.*?)\*', r'\1', text) # italic
return text.strip()
def generate_chat_audio(chat_history):
"""Genera audio della conversazione con voci alternate"""
try:
audio_files = []
audio_dir = Path("temp_audio")
audio_dir.mkdir(exist_ok=True)
# Genera audio per ogni messaggio
for msg in chat_history:
content = clean_markdown(msg["content"])
if not content.strip():
continue
voice = VOICE_USER if msg["role"] == "user" else VOICE_ASSISTANT
file_name = f"chat_{msg['role']}_{hash(content)}.mp3"
output_path = audio_dir / file_name
# Genera audio senza prefissi
asyncio.run(text_to_speech(content, voice, str(output_path)))
audio_files.append(str(output_path))
# Combina tutti gli audio
if audio_files:
from pydub import AudioSegment
combined = AudioSegment.empty()
for audio_file in audio_files:
segment = AudioSegment.from_mp3(audio_file)
combined += segment
final_path = audio_dir / f"chat_complete_{hash(str(chat_history))}.mp3"
combined.export(str(final_path), format="mp3")
return str(final_path)
return None
except Exception as e:
logging.error(f"Errore generazione audio: {e}")
return None
def get_system_prompt(prompt_type="tutor"):
"""Seleziona il prompt di sistema appropriato"""
return SYSTEM_PROMPTS.get(prompt_type, SYSTEM_PROMPTS["tutor"])
def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=LLMType.OPENAI):
"""
Risponde alla domanda 'question' usando i documenti del database 'db_name'.
Restituisce una lista di 2 messaggi in formato:
[
{"role": "user", "content": <domanda>},
{"role": "assistant", "content": <risposta>}
]
In questa versione, viene effettuato il log dei 'chunk' recuperati durante
la ricerca di similarità.
"""
if chat_history is None:
chat_history = []
logging.info(f"Inizio elaborazione domanda: {question} per database: {db_name}")
try:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}") # Percorso corretto
logging.info(f"Verifico esistenza database in: {db_path}")
if not os.path.exists(db_path):
logging.warning(f"Database {db_name} non trovato in {db_path}")
return [
{"role": "user", "content": question},
{"role": "assistant", "content": f"Database non trovato in {db_path}"}
]
# Carica l'indice FAISS
vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
# Cerca i documenti (chunk) più simili
relevant_docs = vectorstore.similarity_search(question, k=3)
# Logga i chunk recuperati
for idx, doc in enumerate(relevant_docs):
logging.info(f"--- Chunk {idx+1} ---")
logging.info(doc.page_content)
logging.info("---------------------")
# Prepara il contesto dai documenti
context = "\n".join([doc.page_content for doc in relevant_docs])
prompt = SYSTEM_PROMPTS[prompt_type].format(context=context)
# Prepara la cronologia completa delle conversazioni
conversation_history = []
for msg in chat_history: # Rimuovo limite di 4 messaggi
conversation_history.append({
"role": msg["role"],
"content": msg["content"]
})
# Costruisci messaggio con contesto completo
messages = [
{"role": "system", "content": prompt},
*conversation_history, # Includi tutta la cronologia
{"role": "user", "content": question}
]
if llm_type == LLMType.OPENAI:
response = openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
temperature=0.7,
max_tokens=2048 # Aumenta token per gestire conversazioni lunghe
)
answer = response.choices[0].message.content
else: # LOCAL
response = local_client.chat.completions.create(
model="qwen2.5-coder-7b-instruct",
messages=messages,
temperature=0.7
)
answer = response.choices[0].message.content
# Genera audio per domanda e risposta
user_audio = generate_speech(question, is_user=True)
assistant_audio = generate_speech(answer, is_user=False)
return [
{"role": "user", "content": question, "audio": user_audio},
{"role": "assistant", "content": answer, "audio": assistant_audio}
]
except Exception as e:
logging.error(f"Errore durante la generazione della risposta: {e}")
return [
{"role": "user", "content": question},
{"role": "assistant", "content": f"Si è verificato un errore: {str(e)}"}
]
if __name__ == "__main__":
pass