Spaces:

nileshhanotia
/

graphql

Sleeping

File size: 4,733 Bytes

c52a0cb

import logging
import requests
import json
from typing import Dict, Any, List
import spacy
from spacy.cli import download

class NLConverter:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.api_url = "https://api.groq.com/openai/v1/chat/completions"
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }

        # Attempt to load spaCy model, and install if missing
        try:
            self.nlp = spacy.load("en_core_web_sm")
        except OSError:
            logging.info("Model 'en_core_web_sm' not found. Downloading...")
            download("en_core_web_sm")
            self.nlp = spacy.load("en_core_web_sm")
        
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)

    def extract_entities(self, text: str) -> List[Dict[str, Any]]:
        """Use spaCy to extract named entities from text."""
        doc = self.nlp(text)
        entities = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
        self.logger.info(f"Extracted entities: {entities}")
        
        # Print extracted entities
        for entity in entities:
            print(f"Entity: {entity['text']} - Label: {entity['label']}")
        
        return entities

    def convert_to_natural_language(self, query_result: Dict[str, Any], original_query: str) -> Dict[str, Any]:
        """Convert query results to natural language using LLaMA-3 and spaCy for NER."""
        if not query_result["success"]:
            self.logger.error("Query execution failed; no results to process.")
            return {"success": False, "error": "No results to process"}

        # Format the query result data
        formatted_data = [dict(zip(query_result["columns"], row)) for row in query_result["results"]]
        
        # Convert formatted data to a string for entity extraction
        formatted_text = "\n".join([str(row) for row in formatted_data])

        # Extract named entities from the query result
        entities = self.extract_entities(formatted_text)

        # Prepare system and user prompts
        system_prompt = (
            "You are a data interpreter that uses named entities to create a clear, natural language explanation. "
            "Your job is to make sense of the given entities, summarize key insights, and answer the original question."
        )

        # Include the original query and the extracted entities in the user prompt
        user_prompt = f"""
        Original question: {original_query}
        Extracted Entities:
        {json.dumps(entities, indent=2)}
        Data Summary:
        {formatted_text}
        Based on this information, generate a natural language explanation of the query results.
        """

        try:
            # Prepare the payload to send to the API
            payload = {
                "model": "llama3-8b-8192",  # Adjust model name if necessary
                "messages": [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                "max_tokens": 500,
                "temperature": 0.3
            }

            # Send the request to the API
            response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=30)
            response.raise_for_status()
            
            result = response.json()
            if 'choices' in result and result['choices']:
                explanation = result['choices'][0]['message']['content'].strip()
                self.logger.info(f"Generated natural language explanation: {explanation}")
                return {"success": True, "explanation": explanation}
            
            return {"success": False, "error": "Failed to generate explanation"}

        except requests.exceptions.RequestException as e:
            self.logger.error(f"API request error: {str(e)}")
            return {"success": False, "error": f"API request failed: {str(e)}"}

# Example Usage
if __name__ == "__main__":
    api_key = "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"
    query_result = {
        "success": True,
        "columns": ["order_id", "total_price", "order_date"],
        "results": [
            [1001, 150, "2024-10-01"],
            [1002, 200, "2024-10-02"]
        ]
    }
    original_query = "Show me the orders with total price greater than 100"
    
    nl_converter = NLConverter(api_key)
    result = nl_converter.convert_to_natural_language(query_result, original_query)
    if result["success"]:
        print(result["explanation"])
    else:
        print(f"Error: {result['error']}")