File size: 4,733 Bytes
c52a0cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import logging
import requests
import json
from typing import Dict, Any, List
import spacy
from spacy.cli import download

class NLConverter:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.api_url = "https://api.groq.com/openai/v1/chat/completions"
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }

        # Attempt to load spaCy model, and install if missing
        try:
            self.nlp = spacy.load("en_core_web_sm")
        except OSError:
            logging.info("Model 'en_core_web_sm' not found. Downloading...")
            download("en_core_web_sm")
            self.nlp = spacy.load("en_core_web_sm")
        
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)

    def extract_entities(self, text: str) -> List[Dict[str, Any]]:
        """Use spaCy to extract named entities from text."""
        doc = self.nlp(text)
        entities = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
        self.logger.info(f"Extracted entities: {entities}")
        
        # Print extracted entities
        for entity in entities:
            print(f"Entity: {entity['text']} - Label: {entity['label']}")
        
        return entities

    def convert_to_natural_language(self, query_result: Dict[str, Any], original_query: str) -> Dict[str, Any]:
        """Convert query results to natural language using LLaMA-3 and spaCy for NER."""
        if not query_result["success"]:
            self.logger.error("Query execution failed; no results to process.")
            return {"success": False, "error": "No results to process"}

        # Format the query result data
        formatted_data = [dict(zip(query_result["columns"], row)) for row in query_result["results"]]
        
        # Convert formatted data to a string for entity extraction
        formatted_text = "\n".join([str(row) for row in formatted_data])

        # Extract named entities from the query result
        entities = self.extract_entities(formatted_text)

        # Prepare system and user prompts
        system_prompt = (
            "You are a data interpreter that uses named entities to create a clear, natural language explanation. "
            "Your job is to make sense of the given entities, summarize key insights, and answer the original question."
        )

        # Include the original query and the extracted entities in the user prompt
        user_prompt = f"""
        Original question: {original_query}
        Extracted Entities:
        {json.dumps(entities, indent=2)}
        Data Summary:
        {formatted_text}
        Based on this information, generate a natural language explanation of the query results.
        """

        try:
            # Prepare the payload to send to the API
            payload = {
                "model": "llama3-8b-8192",  # Adjust model name if necessary
                "messages": [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                "max_tokens": 500,
                "temperature": 0.3
            }

            # Send the request to the API
            response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=30)
            response.raise_for_status()
            
            result = response.json()
            if 'choices' in result and result['choices']:
                explanation = result['choices'][0]['message']['content'].strip()
                self.logger.info(f"Generated natural language explanation: {explanation}")
                return {"success": True, "explanation": explanation}
            
            return {"success": False, "error": "Failed to generate explanation"}

        except requests.exceptions.RequestException as e:
            self.logger.error(f"API request error: {str(e)}")
            return {"success": False, "error": f"API request failed: {str(e)}"}

# Example Usage
if __name__ == "__main__":
    api_key = "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"
    query_result = {
        "success": True,
        "columns": ["order_id", "total_price", "order_date"],
        "results": [
            [1001, 150, "2024-10-01"],
            [1002, 200, "2024-10-02"]
        ]
    }
    original_query = "Show me the orders with total price greater than 100"
    
    nl_converter = NLConverter(api_key)
    result = nl_converter.convert_to_natural_language(query_result, original_query)
    if result["success"]:
        print(result["explanation"])
    else:
        print(f"Error: {result['error']}")