Spaces:

Nugh75
/

Edurag_beta

Sleeping

File size: 7,030 Bytes

080146c

# llm_handling.py
import logging
import os
from langchain_community.vectorstores import FAISS
import requests
from tenacity import retry, stop_after_attempt, wait_exponential
import json

from app.config import *
from app.configs.prompts import SYSTEM_PROMPTS
from app.utils.embedding_utils import get_embeddings
from app.utils.voice_utils import generate_speech

logging.basicConfig(level=logging.INFO)

# =====================================
# Funzioni relative al LLM
# =====================================

def get_llm_client(llm_type: LLMType):
    """Ottiene il client appropriato per il modello selezionato"""
    config = LLM_CONFIGS.get(llm_type)
    if not config:
        raise ValueError(f"Modello {llm_type} non supportato")
    return config["client"](), config["model"]

def get_system_prompt(prompt_type="tutor"):
    """Seleziona il prompt di sistema appropriato"""
    return SYSTEM_PROMPTS.get(prompt_type, SYSTEM_PROMPTS["tutor"])

def test_local_connection():
    """Verifica la connessione al server LLM locale"""
    try:
        response = requests.get(f"http://192.168.82.5:1234/v1/health", timeout=5)
        return response.status_code == 200
    except:
        return False

def read_metadata(db_path):
    metadata_file = os.path.join(db_path, "metadata.json")
    if os.path.exists(metadata_file):
        with open(metadata_file, 'r') as f:
            return json.load(f)
    return []

def get_relevant_documents(vectorstore, question, min_similarity=0.7):
    """Recupera i documenti rilevanti dal vectorstore"""
    try:
        # Migliora la query prima della ricerca
        enhanced_query = enhance_query(question)
        
        # Ottieni documenti con punteggi di similarità
        docs_and_scores = vectorstore.similarity_search_with_score(
            enhanced_query,
            k=8  # Aumenta il numero di documenti recuperati
        )
        
        # Filtra i documenti per similarità
        filtered_docs = [
            doc for doc, score in docs_and_scores 
            if score >= min_similarity
        ]
        
        # Log dei risultati per debug
        logging.info(f"Query: {question}")
        logging.info(f"Documenti trovati: {len(filtered_docs)}")
        
        # Restituisci almeno un documento o una lista vuota
        return filtered_docs[:5] if filtered_docs else []
        
    except Exception as e:
        logging.error(f"Errore nel recupero dei documenti: {e}")
        return []  # Restituisce lista vuota invece di None

def enhance_query(question):
    # Rimuovi parole non significative
    stop_words = set(['il', 'lo', 'la', 'i', 'gli', 'le', 'un', 'uno', 'una'])
    words = [w for w in question.lower().split() if w not in stop_words]
    
    # Estrai keywords chiave
    enhanced_query = " ".join(words)
    return enhanced_query

def log_search_results(question, docs_and_scores):
    logging.info(f"Query: {question}")
    for idx, (doc, score) in enumerate(docs_and_scores, 1):
        logging.info(f"Doc {idx} - Score: {score:.4f}")
        logging.info(f"Content: {doc.page_content[:100]}...")

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def answer_question(question, db_name, prompt_type="tutor", chat_history=None, llm_type=LLMType.OPENAI_GPT_4O_MINI):
    if chat_history is None:
        chat_history = []

    try:
        embeddings = get_embeddings()  # Usa la funzione comune
        db_path = os.path.join(BASE_DB_PATH, f"faiss_index_{db_name}")
        
        # Leggi i metadati
        metadata_list = read_metadata(db_path)
        metadata_dict = {m["filename"]: m for m in metadata_list}
        
        # Recupera i documenti rilevanti
        vectorstore = FAISS.load_local(db_path, embeddings, allow_dangerous_deserialization=True)
        relevant_docs = get_relevant_documents(vectorstore, question)

        if not relevant_docs:
            return [
                {"role": "user", "content": question},
                {"role": "assistant", "content": "Mi dispiace, non ho trovato informazioni rilevanti per rispondere alla tua domanda. Prova a riformularla o a fare una domanda diversa."}
            ]
        
        # Prepara le citazioni delle fonti con numerazione dei chunk
        sources = []
        for idx, doc in enumerate(relevant_docs, 1):
            source_file = doc.metadata.get("source", "Unknown")
            if source_file in metadata_dict:
                meta = metadata_dict[source_file]
                sources.append(f"📚 {meta['title']} (Autore: {meta['author']}) - Parte {idx} di {len(relevant_docs)}")
        
        # Prepara il contesto con le fonti
        context = "\n".join([
            f"[Parte {idx+1} di {len(relevant_docs)}]\n{doc.page_content}" 
            for idx, doc in enumerate(relevant_docs)
        ])
        sources_text = "\n\nFonti consultate:\n" + "\n".join(set(sources))
        
        # Aggiorna il prompt per includere la richiesta di citare le fonti
        prompt = SYSTEM_PROMPTS[prompt_type].format(context=context)
        prompt += "\nCita sempre le fonti utilizzate per la tua risposta includendo il titolo del documento e l'autore."
        
        # Costruisci il messaggio completo
        messages = [
            {"role": "system", "content": prompt},
            *[{"role": m["role"], "content": m["content"]} for m in chat_history],
            {"role": "user", "content": question}
        ]
        
        # Ottieni la risposta dall'LLM
        client, model = get_llm_client(llm_type)
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.7,
            max_tokens=2048
        )
        answer = response.choices[0].message.content + sources_text
        
      
        
        
        # return [
        #     {"role": "user", "content": question, "audio": user_audio},
        #     {"role": "assistant", "content": answer, "audio": assistant_audio}
        # ]

    except Exception as e:
        logging.error(f"Errore durante la generazione della risposta: {e}")
        error_msg = "LLM locale non disponibile. Riprova più tardi o usa OpenAI." if "local" in str(llm_type) else str(e)
        return [
            {"role": "user", "content": question},
            {"role": "assistant", "content": f"⚠️ {error_msg}"}
        ]

class DocumentRetriever:
    def __init__(self, db_path):
        self.embeddings = get_embeddings()
        self.vectorstore = FAISS.load_local(
            db_path, 
            self.embeddings, 
            allow_dangerous_deserialization=True
        )
        
    def get_relevant_chunks(self, question):
        enhanced_query = enhance_query(question)
        docs_and_scores = self.vectorstore.similarity_search_with_score(
            enhanced_query,
            k=8
        )
        
        log_search_results(question, docs_and_scores)
        return self._filter_relevant_docs(docs_and_scores)




if __name__ == "__main__":
    pass