Spaces:

quantumbit
/

rag-bajaj

Sleeping

App Files Files Community

quantumbit commited on 13 days ago

Commit

3fe7b63

verified ·

1 Parent(s): c3b18eb

Delete LLM

Browse files

Files changed (5) hide show

LLM/__init__.py +0 -13
LLM/image_answerer.py +0 -136
LLM/llm_handler.py +0 -216
LLM/one_shotter.py +0 -218
LLM/tabular_answer.py +0 -128

LLM/__init__.py DELETED Viewed

@@ -1,13 +0,0 @@
-# LLM Handler Package
-from .llm_handler import llm_handler
-from .tabular_answer import get_answer_for_tabluar
-from .image_answerer import get_answer_for_image
-from .one_shotter import get_oneshot_answer
-__all__ = [
-    'llm_handler',
-    'get_answer_for_tabluar',
-    'get_answer_for_image',
-    'get_oneshot_answer'
-]

LLM/image_answerer.py DELETED Viewed

@@ -1,136 +0,0 @@
-import os
-import requests
-import google.generativeai as genai
-from PIL import Image
-from io import BytesIO
-from typing import List, Union
-import logging
-from dotenv import load_dotenv
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-load_dotenv()
-# Configure Gemini API for image processing
-genai.configure(api_key=os.getenv("GEMINI_API_KEY_IMAGE"))
-def load_image(image_source: str) -> Image.Image:
-    """Load image from a URL or local path."""
-    try:
-        if image_source.startswith("http://") or image_source.startswith("https://"):
-            logger.info(f"Loading image from URL: {image_source}")
-            response = requests.get(image_source, timeout=30)
-            response.raise_for_status()
-            return Image.open(BytesIO(response.content)).convert("RGB")
-        elif os.path.isfile(image_source):
-            logger.info(f"Loading image from file: {image_source}")
-            return Image.open(image_source).convert("RGB")
-        else:
-            raise ValueError("Invalid image source: must be a valid URL or file path")
-    except Exception as e:
-        logger.error(f"Failed to load image from {image_source}: {e}")
-        raise RuntimeError(f"Failed to load image: {e}")
-def get_answer_for_image(image_source: str, questions: List[str], retries: int = 3) -> List[str]:
-    """Ask questions about an image using Gemini Vision model."""
-    try:
-        logger.info(f"Processing image with {len(questions)} questions")
-        image = load_image(image_source)
-        prompt = """
-Answer the following questions about the image. Give the answers in the same order as the questions.
-Answers should be descriptive and detailed. Give one answer per line with numbering as "1. 2. 3. ..".
-Example answer format:
-1. Answer 1, Explanation
-2. Answer 2, Explanation
-3. Answer 3, Explanation
-Questions:
-"""
-        prompt += "\n".join(f"{i+1}. {q}" for i, q in enumerate(questions))
-        model = genai.GenerativeModel("gemini-1.5-flash")
-        for attempt in range(retries):
-            try:
-                logger.info(f"Attempt {attempt + 1} of {retries} to get response from Gemini")
-                response = model.generate_content(
-                    [prompt, image],
-                    generation_config=genai.types.GenerationConfig(
-                        temperature=0.4,
-                        max_output_tokens=2048
-                    )
-                )
-                raw_text = response.text.strip()
-                logger.info(f"Received response from Gemini: {len(raw_text)} characters")
-                answers = extract_ordered_answers(raw_text, len(questions))
-                if len(answers) == len(questions):
-                    logger.info(f"Successfully extracted {len(answers)} answers")
-                    return answers
-                else:
-                    logger.warning(f"Expected {len(questions)} answers, got {len(answers)}")
-            except Exception as e:
-                logger.error(f"Attempt {attempt + 1} failed: {e}")
-                if attempt == retries - 1:
-                    raise RuntimeError(f"Failed after {retries} attempts: {e}")
-        raise RuntimeError("Failed to get valid response from Gemini.")
-    except Exception as e:
-        logger.error(f"Error in get_answer_for_image: {e}")
-        raise
-def extract_ordered_answers(raw_text: str, expected_count: int) -> List[str]:
-    """Parse the raw Gemini output into a clean list of answers."""
-    import re
-    logger.debug(f"Extracting {expected_count} answers from raw text")
-    lines = raw_text.splitlines()
-    answers = []
-    for line in lines:
-        # Match numbered lines: "1. Answer", "1) Answer", "1 - Answer", etc.
-        match = re.match(r"^\s*(\d+)[\).\s-]*\s*(.+)", line)
-        if match:
-            answer_text = match.group(2).strip()
-            if answer_text:  # Only add non-empty answers
-                answers.append(answer_text)
-    # Fallback: if numbering failed, use plain lines
-    if len(answers) < expected_count:
-        logger.warning("Numbered extraction failed, using fallback method")
-        answers = [line.strip() for line in lines if line.strip()]
-    # Return exactly the expected number of answers
-    result = answers[:expected_count]
-    # If we still don't have enough answers, pad with error messages
-    while len(result) < expected_count:
-        result.append("Unable to extract answer from image")
-    logger.info(f"Extracted {len(result)} answers")
-    return result
-def process_image_query(image_path: str, query: str) -> str:
-    """Process a single query about an image."""
-    try:
-        questions = [query]
-        answers = get_answer_for_image(image_path, questions)
-        return answers[0] if answers else "Unable to process image query"
-    except Exception as e:
-        logger.error(f"Error processing image query: {e}")
-        return f"Error processing image: {str(e)}"
-def process_multiple_image_queries(image_path: str, queries: List[str]) -> List[str]:
-    """Process multiple queries about an image."""
-    try:
-        return get_answer_for_image(image_path, queries)
-    except Exception as e:
-        logger.error(f"Error processing multiple image queries: {e}")
-        return [f"Error processing image: {str(e)}"] * len(queries)

LLM/llm_handler.py DELETED Viewed

@@ -1,216 +0,0 @@
-"""
-Multi-LLM Handler with failover support
-Uses Groq, Gemini, and OpenAI with automatic failover for reliability
-"""
-import asyncio
-import time
-from typing import Optional, Dict, Any, List
-import os
-import requests
-import google.generativeai as genai
-import openai
-from dotenv import load_dotenv
-from config.config import get_provider_configs
-load_dotenv()
-class MultiLLMHandler:
-    """Multi-LLM handler with automatic failover across providers."""
-    def __init__(self):
-        """Initialize the multi-LLM handler with all available providers."""
-        self.providers = get_provider_configs()
-        self.current_provider = None
-        self.current_config = None
-        # Initialize the first available provider (prefer Gemini/OpenAI for general RAG)
-        self._initialize_provider()
-        print(f"✅ Initialized Multi-LLM Handler with {self.provider.upper()}: {self.model_name}")
-    def _initialize_provider(self):
-        """Initialize the first available provider."""
-        # Prefer Gemini first for general text tasks
-        if self.providers["gemini"]:
-            self.current_provider = "gemini"
-            self.current_config = self.providers["gemini"][0]
-            genai.configure(api_key=self.current_config["api_key"])
-        # Then OpenAI
-        elif self.providers["openai"]:
-            self.current_provider = "openai"
-            self.current_config = self.providers["openai"][0]
-            openai.api_key = self.current_config["api_key"]
-        # Finally Groq
-        elif self.providers["groq"]:
-            self.current_provider = "groq"
-            self.current_config = self.providers["groq"][0]
-        else:
-            raise ValueError("No LLM providers available with valid API keys")
-    @property
-    def provider(self):
-        """Get current provider name."""
-        return self.current_provider
-    @property
-    def model_name(self):
-        """Get current model name."""
-        return self.current_config["model"] if self.current_config else "unknown"
-    async def _call_groq(self, prompt: str, temperature: float, max_tokens: int) -> str:
-        """Call Groq API."""
-        headers = {
-            "Authorization": f"Bearer {self.current_config['api_key']}",
-            "Content-Type": "application/json"
-        }
-        data = {
-            "model": self.current_config["model"],
-            "messages": [{"role": "user", "content": prompt}],
-            "temperature": temperature,
-            "max_tokens": max_tokens
-        }
-        response = requests.post(
-            "https://api.groq.com/openai/v1/chat/completions",
-            headers=headers,
-            json=data,
-            timeout=30
-        )
-        response.raise_for_status()
-        result = response.json()
-        return result["choices"][0]["message"]["content"].strip()
-    async def _call_gemini(self, prompt: str, temperature: float, max_tokens: int) -> str:
-        """Call Gemini API."""
-        model = genai.GenerativeModel(self.current_config["model"])
-        generation_config = genai.types.GenerationConfig(
-            temperature=temperature,
-            max_output_tokens=max_tokens
-        )
-        response = await asyncio.to_thread(
-            model.generate_content,
-            prompt,
-            generation_config=generation_config
-        )
-        return response.text.strip()
-    async def _call_openai(self, prompt: str, temperature: float, max_tokens: int) -> str:
-        """Call OpenAI API."""
-        response = await asyncio.to_thread(
-            openai.ChatCompletion.create,
-            model=self.current_config["model"],
-            messages=[{"role": "user", "content": prompt}],
-            temperature=temperature,
-            max_tokens=max_tokens
-        )
-        return response.choices[0].message.content.strip()
-    async def _try_with_failover(self, prompt: str, temperature: float, max_tokens: int) -> str:
-        """Try to generate text with automatic failover."""
-        # Get all available providers in order
-        provider_order = []
-        # Prefer Gemini -> OpenAI -> Groq for general text
-        if self.providers["gemini"]:
-            provider_order.extend([("gemini", config) for config in self.providers["gemini"]])
-        if self.providers["openai"]:
-            provider_order.extend([("openai", config) for config in self.providers["openai"]])
-        if self.providers["groq"]:
-            provider_order.extend([("groq", config) for config in self.providers["groq"]])
-        last_error = None
-        for provider_name, config in provider_order:
-            try:
-                # Set current provider
-                old_provider = self.current_provider
-                old_config = self.current_config
-                self.current_provider = provider_name
-                self.current_config = config
-                # Configure API if needed
-                if provider_name == "gemini":
-                    genai.configure(api_key=config["api_key"])
-                elif provider_name == "openai":
-                    openai.api_key = config["api_key"]
-                # Try the API call
-                if provider_name == "groq":
-                    return await self._call_groq(prompt, temperature, max_tokens)
-                elif provider_name == "gemini":
-                    return await self._call_gemini(prompt, temperature, max_tokens)
-                elif provider_name == "openai":
-                    return await self._call_openai(prompt, temperature, max_tokens)
-            except Exception as e:
-                print(f"⚠️ {provider_name.upper()} ({config['name']}) failed: {str(e)}")
-                last_error = e
-                # Restore previous provider
-                self.current_provider = old_provider
-                self.current_config = old_config
-                continue
-        # If all providers failed
-        raise RuntimeError(f"All LLM providers failed. Last error: {last_error}")
-    async def generate_text(self,
-                          prompt: Optional[str] = None,
-                          system_prompt: Optional[str] = None,
-                          user_prompt: Optional[str] = None,
-                          temperature: Optional[float] = 0.4,
-                          max_tokens: Optional[int] = 1200) -> str:
-        """Generate text using multi-LLM with failover."""
-        # Handle both single prompt and system/user prompt formats
-        if prompt:
-            final_prompt = prompt
-        elif system_prompt and user_prompt:
-            final_prompt = f"{system_prompt}\n\n{user_prompt}"
-        elif user_prompt:
-            final_prompt = user_prompt
-        else:
-            raise ValueError("Must provide either 'prompt' or 'user_prompt'")
-        return await self._try_with_failover(
-            final_prompt,
-            temperature or 0.4,
-            max_tokens or 1200
-        )
-    async def generate_simple(self,
-                            prompt: str,
-                            temperature: Optional[float] = 0.4,
-                            max_tokens: Optional[int] = 1200) -> str:
-        """Simple text generation (alias for generate_text for compatibility)."""
-        return await self.generate_text(prompt=prompt, temperature=temperature, max_tokens=max_tokens)
-    def get_provider_info(self) -> Dict[str, Any]:
-        """Get information about the current provider."""
-        return {
-            "provider": self.current_provider,
-            "model": self.model_name,
-            "config_name": self.current_config["name"] if self.current_config else "none",
-            "available_providers": {
-                "groq": len(self.providers["groq"]),
-                "gemini": len(self.providers["gemini"]),
-                "openai": len(self.providers["openai"])
-            }
-        }
-    async def test_connection(self) -> bool:
-        """Test the connection to the current LLM provider."""
-        try:
-            test_prompt = "Say 'Hello' if you can read this."
-            response = await self.generate_simple(test_prompt, temperature=0.1, max_tokens=10)
-            return "hello" in response.lower()
-        except Exception as e:
-            print(f"❌ Connection test failed: {str(e)}")
-            return False
-# Create a global instance
-llm_handler = MultiLLMHandler()

LLM/one_shotter.py DELETED Viewed

@@ -1,218 +0,0 @@
-import re
-import asyncio
-from typing import List, Dict
-from urllib.parse import urlparse
-import httpx
-from bs4 import BeautifulSoup
-import os
-from dotenv import load_dotenv
-load_dotenv()
-# Import our multi-LLM handler
-from LLM.llm_handler import llm_handler
-# URL extraction pattern (same as ShastraDocs)
-URL_PATTERN = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
-def extract_urls_from_text(text: str) -> List[str]:
-    urls = URL_PATTERN.findall(text or "")
-    seen = set()
-    clean_urls = []
-    for url in urls:
-        clean_url = url.rstrip('.,;:!?)')
-        if clean_url and clean_url not in seen and validate_url(clean_url):
-            seen.add(clean_url)
-            clean_urls.append(clean_url)
-    return clean_urls
-def validate_url(url: str) -> bool:
-    try:
-        result = urlparse(url)
-        return bool(result.scheme and result.netloc)
-    except Exception:
-        return False
-async def scrape_url(url: str, max_chars: int = 4000) -> Dict[str, str]:
-    """Async URL scraping using httpx + BeautifulSoup (FastAPI-friendly)."""
-    try:
-        timeout = httpx.Timeout(20.0)
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-        }
-        async with httpx.AsyncClient(timeout=timeout, headers=headers, follow_redirects=True) as client:
-            resp = await client.get(url)
-            resp.raise_for_status()
-            soup = BeautifulSoup(resp.content, 'html.parser')
-            for tag in soup(['script', 'style', 'nav', 'footer', 'header', 'aside']):
-                tag.decompose()
-            text_content = soup.get_text(separator=' ', strip=True)
-            cleaned = ' '.join(text_content.split())
-            if len(cleaned) > max_chars:
-                cleaned = cleaned[:max_chars] + "..."
-            return {
-                'url': url,
-                'content': cleaned,
-                'status': 'success',
-                'length': len(cleaned),
-                'title': soup.title.string if soup.title else 'No title'
-            }
-    except httpx.TimeoutException:
-        return {'url': url, 'content': 'Timeout error', 'status': 'timeout', 'length': 0, 'title': 'Timeout'}
-    except Exception as e:
-        return {'url': url, 'content': f'Error: {str(e)[:100]}', 'status': 'error', 'length': 0, 'title': 'Error'}
-async def scrape_urls(urls: List[str], max_chars: int = 4000) -> List[Dict[str, str]]:
-    if not urls:
-        return []
-    sem = asyncio.Semaphore(5)
-    async def _scrape(u):
-        async with sem:
-            return await scrape_url(u, max_chars)
-    results = await asyncio.gather(*[_scrape(u) for u in urls], return_exceptions=True)
-    final = []
-    for i, r in enumerate(results):
-        if isinstance(r, Exception):
-            final.append({'url': urls[i], 'content': f'Exception: {str(r)[:100]}', 'status': 'exception', 'length': 0, 'title': 'Exception'})
-        else:
-            final.append(r)
-    return final
-def build_additional_content(scrapes: List[Dict[str, str]]) -> str:
-    parts = []
-    for r in scrapes:
-        if r.get('status') == 'success' and r.get('length', 0) > 50:
-            parts.append("\n" + "="*50)
-            parts.append(f"SOURCE: Additional Source")
-            parts.append(f"URL: {r.get('url','')}")
-            parts.append(f"TITLE: {r.get('title','No title')}")
-            parts.append("-"*30 + " CONTENT " + "-"*30)
-            parts.append(r.get('content',''))
-            parts.append("="*50)
-    return "\n".join(parts)
-def parse_numbered_answers(text: str, expected_count: int) -> List[str]:
-    """Parse numbered answers, with sane fallbacks."""
-    pattern = re.compile(r'^\s*(\d+)[\).\-]\s*(.+)$', re.MULTILINE)
-    matches = pattern.findall(text or "")
-    result: Dict[int, str] = {}
-    for num_str, answer in matches:
-        try:
-            num = int(num_str)
-            if 1 <= num <= expected_count:
-                clean_answer = re.sub(r'\s+', ' ', answer).strip()
-                if clean_answer:
-                    result[num] = clean_answer
-        except Exception:
-            continue
-    answers: List[str] = []
-    for i in range(1, expected_count + 1):
-        answers.append(result.get(i, f"Unable to find answer for question {i}"))
-    return answers
-def parse_answers_from_json(raw: str, expected_count: int) -> List[str]:
-    import json, re
-    # Try direct JSON
-    try:
-        obj = json.loads(raw)
-        if isinstance(obj, dict) and isinstance(obj.get('answers'), list):
-            out = [str(x).strip() for x in obj['answers']][:expected_count]
-            while len(out) < expected_count:
-                out.append(f"Unable to find answer for question {len(out)+1}")
-            return out
-    except Exception:
-        pass
-    # Try to extract JSON fragment
-    m = re.search(r'\{[^\{\}]*"answers"[^\{\}]*\}', raw or "", re.DOTALL)
-    if m:
-        try:
-            obj = json.loads(m.group(0))
-            if isinstance(obj, dict) and isinstance(obj.get('answers'), list):
-                out = [str(x).strip() for x in obj['answers']][:expected_count]
-                while len(out) < expected_count:
-                    out.append(f"Unable to find answer for question {len(out)+1}")
-                return out
-        except Exception:
-            pass
-    # Fallback to numbered parsing
-    return parse_numbered_answers(raw or "", expected_count)
-async def get_oneshot_answer(content: str, questions: List[str]) -> List[str]:
-    """
-    Enhanced oneshot QA flow with ShastraDocs-style URL extraction and scraping.
-    - Extract URLs from content and questions
-    - Scrape relevant pages
-    - Merge additional content and feed to LLM
-    - Return per-question answers
-    """
-    if not questions:
-        return []
-    try:
-        # Build numbered questions
-        numbered_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
-        # Find URLs from content and questions
-        combined = (content or "") + "\n" + "\n".join(questions or [])
-        found_urls = extract_urls_from_text(combined)
-        # Special case: content starts with URL marker
-        if content.startswith("URL for Context:"):
-            only_url = content.replace("URL for Context:", "").strip()
-            if validate_url(only_url):
-                if only_url not in found_urls:
-                    found_urls.insert(0, only_url)
-        # Scrape URLs if any
-        additional_content = ""
-        if found_urls:
-            print(f"🚀 Scraping {len(found_urls)} URL(s) for additional context...")
-            scrape_results = await scrape_urls(found_urls, max_chars=4000)
-            additional_content = build_additional_content(scrape_results)
-            print(f"📄 Additional content length: {len(additional_content)}")
-        # Merge final context
-        if additional_content:
-            final_context = (content or "") + "\n\nADDITIONAL INFORMATION FROM SCRAPED SOURCES:\n" + additional_content
-        else:
-            final_context = content or ""
-        print(f"📊 Final context length: {len(final_context)}")
-        # Prompts (ask for JSON answers to improve parsing)
-        system_prompt = (
-            "You are an expert assistant. Read ALL provided context (including any 'ADDITIONAL INFORMATION FROM\n"
-            "SCRAPED SOURCES') and answer the questions comprehensively. If info is missing, say so."
-        )
-        user_prompt = f"""FULL CONTEXT:
-{final_context[:8000]}{"..." if len(final_context) > 8000 else ""}
-QUESTIONS:
-{numbered_questions}
-Respond in this EXACT JSON format:
-{{
-    "answers": [
-        "<Answer to question 1>",
-        "<Answer to question 2>",
-        "<Answer to question 3>"
-    ]
-}}"""
-        print(f"🤖 Using {llm_handler.provider.upper()} model: {llm_handler.model_name}")
-        raw = await llm_handler.generate_text(
-            system_prompt=system_prompt,
-            user_prompt=user_prompt,
-            temperature=0.4,
-            max_tokens=1800
-        )
-        print(f"🔄 LLM response length: {len(raw) if raw else 0}")
-        answers = parse_answers_from_json(raw, len(questions))
-        print(f"✅ Parsed {len(answers)} answers")
-        return answers
-    except Exception as e:
-        print(f"❌ Error in oneshot answer generation: {str(e)}")
-        return [f"Error processing question: {str(e)}" for _ in questions]

LLM/tabular_answer.py DELETED Viewed

@@ -1,128 +0,0 @@
-import os
-import re
-import math
-from typing import List
-from langchain_core.messages import SystemMessage, HumanMessage
-from langchain_groq import ChatGroq
-from dotenv import load_dotenv
-load_dotenv()
-TABULAR_VERBOSE = os.environ.get("TABULAR_VERBOSE", "0") in ("1", "true", "True", "yes", "YES")
-# Initialize Groq LLM for tabular data using specialized API key
-TABULAR_MODEL = os.environ.get("GROQ_TABULAR_MODEL", os.environ.get("GROQ_MODEL_TABULAR", "qwen/qwen3-32b"))
-GROQ_LLM = ChatGroq(
-    groq_api_key=os.environ.get("GROQ_API_KEY_TABULAR", os.environ.get("GROQ_API_KEY")),
-    model_name=TABULAR_MODEL
-)
-def get_answer_for_tabluar(
-    data: str,
-    questions: List[str],
-    batch_size: int = 10,
-    verbose: bool = False
-) -> List[str]:
-    """
-    Query Groq LLM for tabular data analysis, handling batches and preserving order of answers.
-    Args:
-        data (str): Tabular context in markdown or plain-text.
-        questions (List[str]): List of questions to ask.
-        batch_size (int): Max number of questions per batch.
-        verbose (bool): If True, print raw LLM responses.
-    Returns:
-        List[str]: Ordered list of answers corresponding to input questions.
-    """
-    def parse_numbered_answers(text: str, expected: int) -> List[str]:
-        """
-        Parse answers from a numbered list format ('1.', '2.', etc.)
-        Use non-greedy capture with lookahead to stop at the next number or end.
-        """
-        pattern = re.compile(r"^\s*(\d{1,2})[\.)\-]\s*(.*?)(?=\n\s*\d{1,2}[\.)\-]\s*|$)", re.MULTILINE | re.DOTALL)
-        matches = pattern.findall(text)
-        result = {}
-        for num_str, answer in matches:
-            try:
-                num = int(num_str)
-            except ValueError:
-                continue
-            if 1 <= num <= expected:
-                clean_answer = re.sub(r'\s+', ' ', answer).strip()
-                result[num] = clean_answer
-        # If no structured matches, fall back to line-based heuristic
-        if not result:
-            lines = [ln.strip() for ln in text.strip().splitlines() if ln.strip()]
-            for i in range(min(expected, len(lines))):
-                result[i + 1] = lines[i]
-        # Build fixed-length list
-        answers = []
-        for i in range(1, expected + 1):
-            answers.append(result.get(i, f"Unable to answer question {i}"))
-        return answers
-    if not questions:
-        return []
-    # Process questions in batches
-    all_answers = []
-    total_batches = math.ceil(len(questions) / batch_size)
-    for batch_idx in range(total_batches):
-        start = batch_idx * batch_size
-        end = min(start + batch_size, len(questions))
-        batch_questions = questions[start:end]
-        print(f"Processing batch {batch_idx + 1}/{total_batches} ({len(batch_questions)} questions)")
-        # Create numbered question list
-        numbered_questions = "\\n".join([f"{i+1}. {q}" for i, q in enumerate(batch_questions)])
-        # Create prompt
-        system_prompt = """You are an expert data analyst. Analyze the provided tabular data and answer the questions accurately.
-Instructions:
-- Answer each question based ONLY on the data provided
-- If data is insufficient, state "Information not available in the provided data"
-- Provide clear, concise answers
-- Format your response as a numbered list (1., 2., 3., etc.)
-- Do not add explanations unless specifically asked"""
-        user_prompt = f"""Data:
-{data}
-Questions:
-{numbered_questions}
-Please provide numbered answers (1., 2., 3., etc.) for each question."""
-        try:
-            # Create messages
-            messages = [
-                SystemMessage(content=system_prompt),
-                HumanMessage(content=user_prompt)
-            ]
-            # Get response from LLM
-            response = GROQ_LLM.invoke(messages)
-            raw_response = response.content or ""
-            if verbose or TABULAR_VERBOSE:
-                print(f"🟢 Raw LLM Response (batch {batch_idx + 1}):\n{raw_response[:1200]}\n--- END RAW ---")
-            # Parse the response
-            batch_answers = parse_numbered_answers(raw_response, len(batch_questions))
-            all_answers.extend(batch_answers)
-        except Exception as e:
-            print(f"Error processing batch {batch_idx + 1}: {str(e)}")
-            # Add error answers for this batch
-            error_answers = [f"Error processing question: {str(e)}" for _ in batch_questions]
-            all_answers.extend(error_answers)
-    return all_answers