import logging import requests import json from typing import Dict, Any, List import spacy from spacy.cli import download class NLConverter: def __init__(self, api_key: str): self.api_key = api_key self.api_url = "https://api.groq.com/openai/v1/chat/completions" self.headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } # Attempt to load spaCy model, and install if missing try: self.nlp = spacy.load("en_core_web_sm") except OSError: logging.info("Model 'en_core_web_sm' not found. Downloading...") download("en_core_web_sm") self.nlp = spacy.load("en_core_web_sm") logging.basicConfig(level=logging.INFO) self.logger = logging.getLogger(__name__) def extract_entities(self, text: str) -> List[Dict[str, Any]]: """Use spaCy to extract named entities from text.""" doc = self.nlp(text) entities = [{"text": ent.text, "label": ent.label_} for ent in doc.ents] self.logger.info(f"Extracted entities: {entities}") # Print extracted entities for entity in entities: print(f"Entity: {entity['text']} - Label: {entity['label']}") return entities def convert_to_natural_language(self, query_result: Dict[str, Any], original_query: str) -> Dict[str, Any]: """Convert query results to natural language using LLaMA-3 and spaCy for NER.""" if not query_result["success"]: self.logger.error("Query execution failed; no results to process.") return {"success": False, "error": "No results to process"} # Format the query result data formatted_data = [dict(zip(query_result["columns"], row)) for row in query_result["results"]] # Convert formatted data to a string for entity extraction formatted_text = "\n".join([str(row) for row in formatted_data]) # Extract named entities from the query result entities = self.extract_entities(formatted_text) # Prepare system and user prompts system_prompt = ( "You are a data interpreter that uses named entities to create a clear, natural language explanation. " "Your job is to make sense of the given entities, summarize key insights, and answer the original question." ) # Include the original query and the extracted entities in the user prompt user_prompt = f""" Original question: {original_query} Extracted Entities: {json.dumps(entities, indent=2)} Data Summary: {formatted_text} Based on this information, generate a natural language explanation of the query results. """ try: # Prepare the payload to send to the API payload = { "model": "llama3-8b-8192", # Adjust model name if necessary "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], "max_tokens": 500, "temperature": 0.3 } # Send the request to the API response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=30) response.raise_for_status() result = response.json() if 'choices' in result and result['choices']: explanation = result['choices'][0]['message']['content'].strip() self.logger.info(f"Generated natural language explanation: {explanation}") return {"success": True, "explanation": explanation} return {"success": False, "error": "Failed to generate explanation"} except requests.exceptions.RequestException as e: self.logger.error(f"API request error: {str(e)}") return {"success": False, "error": f"API request failed: {str(e)}"} # Example Usage if __name__ == "__main__": api_key = "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" query_result = { "success": True, "columns": ["order_id", "total_price", "order_date"], "results": [ [1001, 150, "2024-10-01"], [1002, 200, "2024-10-02"] ] } original_query = "Show me the orders with total price greater than 100" nl_converter = NLConverter(api_key) result = nl_converter.convert_to_natural_language(query_result, original_query) if result["success"]: print(result["explanation"]) else: print(f"Error: {result['error']}")