Spaces:

NEXAS
/

docling_rag

Running

File size: 2,349 Bytes

cb560d6

from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_groq import ChatGroq
import os
import json
from typing import List, Dict

class LLMProcessor:
    def __init__(self):
        """Initialize embedding model and Groq LLM"""
        self.api_key = os.getenv("GROQ_API_KEY")

        # Use FastEmbed instead of SentenceTransformer
        self.embed_model = FastEmbedEmbeddings()

        self.llm = ChatGroq(
            model_name="mixtral-8x7b-32768",
            api_key=self.api_key
        )

    def format_context(self, chunks: List[Dict]) -> str:
        """Format retrieved chunks into a structured context for the LLM"""
        context_parts = []
        for chunk in chunks:
            try:
                headings = json.loads(chunk['headings'])
                if headings:
                    context_parts.append(f"Section: {' > '.join(headings)}")
            except:
                pass

            if chunk['page']:
                context_parts.append(f"Page {chunk['page']}:")
            
            context_parts.append(chunk['text'])
            context_parts.append("-" * 40)

        return "\n".join(context_parts)

    def generate_answer(self, context: str, question: str) -> str:
        """Generate answer using structured context"""
        prompt = f"""
You are an AI assistant tasked with answering user questions based on the given document excerpts. Your goal is to provide a clear, accurate, and helpful answer using only the provided context. 

If the answer is not found in the context, explicitly state that you do not know instead of making up an answer. If the question is out of context, say that it is out of context, but still try to provide the best possible response from the available information.

---
### Context:  
{context}

### User Question:  
{question}

---
#### Instructions:
- Use only the given context to construct your answer.
- Reference relevant sections and page numbers where applicable.
- Be concise yet informative, focusing on clarity and usefulness.
- If uncertain, respond honestly (e.g., "The answer is not found in the provided context.").
- If out of context, state so clearly (e.g., "The question is out of context, but here’s what I found in the document...").

---
### Helpful Answer:  
"""

        return self.llm.invoke(prompt)