Spaces:

NEXAS
/

docling_rag

Running

App Files Files Community

docling_rag / utils /llm.py

NEXAS

Update utils/llm.py

cb560d6 verified 4 months ago

raw

history blame

2.35 kB

	from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
	from langchain_groq import ChatGroq
	import os
	import json
	from typing import List, Dict

	class LLMProcessor:
	def __init__(self):
	"""Initialize embedding model and Groq LLM"""
	self.api_key = os.getenv("GROQ_API_KEY")

	# Use FastEmbed instead of SentenceTransformer
	self.embed_model = FastEmbedEmbeddings()

	self.llm = ChatGroq(
	model_name="mixtral-8x7b-32768",
	api_key=self.api_key
	)

	def format_context(self, chunks: List[Dict]) -> str:
	"""Format retrieved chunks into a structured context for the LLM"""
	context_parts = []
	for chunk in chunks:
	try:
	headings = json.loads(chunk['headings'])
	if headings:
	context_parts.append(f"Section: {' > '.join(headings)}")
	except:
	pass

	if chunk['page']:
	context_parts.append(f"Page {chunk['page']}:")

	context_parts.append(chunk['text'])
	context_parts.append("-" * 40)

	return "\n".join(context_parts)

	def generate_answer(self, context: str, question: str) -> str:
	"""Generate answer using structured context"""
	prompt = f"""
	You are an AI assistant tasked with answering user questions based on the given document excerpts. Your goal is to provide a clear, accurate, and helpful answer using only the provided context.

	If the answer is not found in the context, explicitly state that you do not know instead of making up an answer. If the question is out of context, say that it is out of context, but still try to provide the best possible response from the available information.

	---
	### Context:
	{context}

	### User Question:
	{question}

	---
	#### Instructions:
	- Use only the given context to construct your answer.
	- Reference relevant sections and page numbers where applicable.
	- Be concise yet informative, focusing on clarity and usefulness.
	- If uncertain, respond honestly (e.g., "The answer is not found in the provided context.").
	- If out of context, state so clearly (e.g., "The question is out of context, but here’s what I found in the document...").

	---
	### Helpful Answer:
	"""

	return self.llm.invoke(prompt)