File size: 8,397 Bytes
bf63887
15d7664
b7b15af
 
468efae
302bb07
468efae
98de42b
302bb07
468efae
cab3bb7
302bb07
1e93487
 
 
 
cab3bb7
 
 
 
 
 
 
 
 
98de42b
1e93487
468efae
 
98de42b
468efae
302bb07
468efae
 
 
302bb07
468efae
 
302bb07
468efae
 
 
302bb07
468efae
 
302bb07
468efae
 
 
 
 
302bb07
468efae
 
 
 
 
 
1e93487
468efae
302bb07
1e93487
468efae
302bb07
468efae
 
 
e9ea4f8
468efae
 
 
e9ea4f8
468efae
 
 
 
 
e9ea4f8
468efae
 
e9ea4f8
468efae
e9ea4f8
2e8b875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468efae
 
 
98de42b
302bb07
468efae
98de42b
302bb07
468efae
 
302bb07
468efae
 
302bb07
2e8b875
 
 
 
302bb07
2e8b875
468efae
302bb07
468efae
302bb07
468efae
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import gradio as gr
from langchain_openai import ChatOpenAI
# from dspy import Agent  # Base class for custom agent
# from dspy import spawn_processes  # Distributed computing utility
from transformers import pipeline

# Choose model
model_name = "dolphin-phi"

# Load the chosen LLM model
llm = pipeline("text-generation", model="TheBloke/dolphin-2_6-phi-2-GGUF")

#Vectara config:
# customer_id =
# corpus_id = 
# api_key = 

# Brought from Vectara example from Tonic. Global variables to hold component references
components = {}
dotenv.load_dotenv()
seamless_client = Client("TheBloke/dolphin-2_6-phi-2-GGUF")
HuggingFace_Token = os.getenv("HuggingFace_Token")
hf_token = os.getenv("HuggingFace_Token")
base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id')
model_directory = os.getenv('MODEL_DIRECTORY', 'default_model_directory')
import requests

# DSPy-based prompt generation
from dspy.agents import Agent
from dspy import spawn_processes
from dspy.utils import SentenceSplitter, SentimentAnalyzer, NamedEntityRecognizer

def dspy_generate_agent_prompts(prompt):
    """
    Generates prompts for different agents based on the provided prompt and DSPy functionalities.

    Args:
        prompt (str): The user-provided prompt (e.g., customer reviews).

    Returns:
        list: A list containing agent-specific prompts.
    """

    # 1. Split the prompt into individual sentences
    sentences = SentenceSplitter().process(prompt)

    # 2. Analyze sentiment for each sentence
    sentiment_analyzer = SentimentAnalyzer()
    sentiment_labels = []
    for sentence in sentences:
        sentiment_labels.append(sentiment_analyzer.analyze(sentence))

    # 3. Extract named entities related to specific topics
    ner = NamedEntityRecognizer(model_name="en_core_web_sm")
    extracted_entities = {}
    for sentence in sentences:
        entities = ner.process(sentence)
        for entity in entities:
            if entity.label_ in ["FOOD", "ORG", "LOCATION"]:  # Customize entity labels based on needs
                extracted_entities.setdefault(entity.label_, []).append(entity.text)

    # 4. Craft prompts for each agent (incomplete)
    agent_prompts = []

    # **Sentiment Analyzer Prompt:**
    sentiment_prompt = f"Analyze the sentiment of the following sentences:\n" + "\n".join(sentences)
    agent_prompts.append(sentiment_prompt)

    # **Topic Extractor Prompt:** (Modify based on your specific topics)
    topic_prompt = f"Extract the main topics discussed in the following text, focusing on food, service, and ambiance:\n{prompt}"
    agent_prompts.append(topic_prompt)

    # **Recommendation Generator Prompt:** (Modify based on your requirements)
    positive_count = sum(label == "POSITIVE" for label in sentiment_labels)
    negative_count = sum(label == "NEGATIVE" for label in sentiment_labels)
    neutral_count = sum(label == "NEUTRAL" for label in sentiment_labels)
    topic_mentions = "\n".join(f"{k}: {','.join(v)}" for k, v in extracted_entities.items())

    recommendation_prompt = f"""Based on the sentiment analysis (positive: {positive_count}, negative: {negative_count}, neutral: {neutral_count}) and extracted topics ({topic_mentions}), suggest recommendations for the restaurant to improve."""
    agent_prompts.append(recommendation_prompt)

    return agent_prompts

def query_vectara(text):
    user_message = text

    # Read authentication parameters from the .env file
    customer_id = os.getenv('CUSTOMER_ID')
    corpus_id = os.getenv('CORPUS_ID')
    api_key = os.getenv('API_KEY')

    # Define the headers
    api_key_header = {
        "customer-id": customer_id,
        "x-api-key": api_key
    }

    # Define the request body in the structure provided in the example
    request_body = {
        "query": [
            {
                "query": user_message,
                "queryContext": "",
                "start": 1,
                "numResults": 25,
                "contextConfig": {
                    "charsBefore": 0,
                    "charsAfter": 0,
                    "sentencesBefore": 2,
                    "sentencesAfter": 2,
                    "startTag": "%START_SNIPPET%",
                    "endTag": "%END_SNIPPET%",
                },
                "rerankingConfig": {
                    "rerankerId": 272725718,
                    "mmrConfig": {
                        "diversityBias": 0.35
                    }
                },
                "corpusKey": [
                    {
                        "customerId": customer_id,
                        "corpusId": corpus_id,
                        "semantics": 0,
                        "metadataFilter": "",
                        "lexicalInterpolationConfig": {
                            "lambda": 0
                        },
                        "dim": []
                    }
                ],
                "summary": [
                    {
                        "maxSummarizedResults": 5,
                        "responseLang": "auto",
                        "summarizerPromptName": "vectara-summary-ext-v1.2.0"
                    }
                ]
            }
        ]
    }

    # Make the API request using Gradio
    response = requests.post(
        "https://api.vectara.io/v1/query",
        json=request_body,  # Use json to automatically serialize the request body
        verify=True,
        headers=api_key_header
    )

    if response.status_code == 200:
        query_data = response.json()
        if query_data:
            sources_info = []

            # Extract the summary.
            summary = query_data['responseSet'][0]['summary'][0]['text']

            # Iterate over all response sets
            for response_set in query_data.get('responseSet', []):
                # Extract sources
                # Limit to top 5 sources.
                for source in response_set.get('response', [])[:5]:
                    source_metadata = source.get('metadata', [])
                    source_info = {}

                    for metadata in source_metadata:
                        metadata_name = metadata.get('name', '')
                        metadata_value = metadata.get('value', '')

                        if metadata_name == 'title':
                            source_info['title'] = metadata_value
                        elif metadata_name == 'author':
                            source_info['author'] = metadata_value
                        elif metadata_name == 'pageNumber':
                            source_info['page number'] = metadata_value

                    if source_info:
                        sources_info.append(source_info)

            result = {"summary": summary, "sources": sources_info}
            return f"{json.dumps(result, indent=2)}"
        else:
            return "No data found in the response."
    else:
        return f"Error: {response.status_code}"

# Define the main function to be used with Gradio
def generate_outputs(user_prompt):
    # 1. Process prompt with langchain (replace with your actual implementation)
    # processed_prompt = dspy_generate_agent_prompts(user_prompt)  # Replaced langchain logic with DSPy function below

    # 2. Generate synthetic data using DSPy's distributed computing capabilities
    synthetic_data = generate_synthetic_data_distributed(user_prompt)

    # 3. Combine user prompt and synthetic data
    combined_data = f"{user_prompt}\n{synthetic_data}"

    # 4. Generate prompts for agents using DSPy
    agent_prompts = dspy_generate_agent_prompts(processed_prompt)

    # 5. Use the chosen LLM for two of the prompts and vectara tool use for the third agent
    output_1 = llm(agent_prompts[0], max_length=100)[0][combined_data]
    output_2 = llm(agent_prompts[1], max_length=100)[0][combined_data]
    output_3 = query_vectara(prompt)

    # 6. Produce outputs with Langchain or DSPy (stand in section)
    report, recommendations, visualization = produce_outputs(combined_data)

    return report, recommendations, visualization

# Create the Gradio interface
gr.Interface(
    fn=generate_outputs,
    inputs=gr.Textbox(label="Enter a prompt"),
    outputs=["textbox", "textbox", "image"],
    title="Multi-Agent Prompt Processor",
    description="Processes a prompt using Langchain, DSPy, and a chosen Hugging Face LLM to generate diverse outputs.",
).launch()