File size: 8,397 Bytes
bf63887 15d7664 b7b15af 468efae 302bb07 468efae 98de42b 302bb07 468efae cab3bb7 302bb07 1e93487 cab3bb7 98de42b 1e93487 468efae 98de42b 468efae 302bb07 468efae 302bb07 468efae 302bb07 468efae 302bb07 468efae 302bb07 468efae 302bb07 468efae 1e93487 468efae 302bb07 1e93487 468efae 302bb07 468efae e9ea4f8 468efae e9ea4f8 468efae e9ea4f8 468efae e9ea4f8 468efae e9ea4f8 2e8b875 468efae 98de42b 302bb07 468efae 98de42b 302bb07 468efae 302bb07 468efae 302bb07 2e8b875 302bb07 2e8b875 468efae 302bb07 468efae 302bb07 468efae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
import gradio as gr
from langchain_openai import ChatOpenAI
# from dspy import Agent # Base class for custom agent
# from dspy import spawn_processes # Distributed computing utility
from transformers import pipeline
# Choose model
model_name = "dolphin-phi"
# Load the chosen LLM model
llm = pipeline("text-generation", model="TheBloke/dolphin-2_6-phi-2-GGUF")
#Vectara config:
# customer_id =
# corpus_id =
# api_key =
# Brought from Vectara example from Tonic. Global variables to hold component references
components = {}
dotenv.load_dotenv()
seamless_client = Client("TheBloke/dolphin-2_6-phi-2-GGUF")
HuggingFace_Token = os.getenv("HuggingFace_Token")
hf_token = os.getenv("HuggingFace_Token")
base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id')
model_directory = os.getenv('MODEL_DIRECTORY', 'default_model_directory')
import requests
# DSPy-based prompt generation
from dspy.agents import Agent
from dspy import spawn_processes
from dspy.utils import SentenceSplitter, SentimentAnalyzer, NamedEntityRecognizer
def dspy_generate_agent_prompts(prompt):
"""
Generates prompts for different agents based on the provided prompt and DSPy functionalities.
Args:
prompt (str): The user-provided prompt (e.g., customer reviews).
Returns:
list: A list containing agent-specific prompts.
"""
# 1. Split the prompt into individual sentences
sentences = SentenceSplitter().process(prompt)
# 2. Analyze sentiment for each sentence
sentiment_analyzer = SentimentAnalyzer()
sentiment_labels = []
for sentence in sentences:
sentiment_labels.append(sentiment_analyzer.analyze(sentence))
# 3. Extract named entities related to specific topics
ner = NamedEntityRecognizer(model_name="en_core_web_sm")
extracted_entities = {}
for sentence in sentences:
entities = ner.process(sentence)
for entity in entities:
if entity.label_ in ["FOOD", "ORG", "LOCATION"]: # Customize entity labels based on needs
extracted_entities.setdefault(entity.label_, []).append(entity.text)
# 4. Craft prompts for each agent (incomplete)
agent_prompts = []
# **Sentiment Analyzer Prompt:**
sentiment_prompt = f"Analyze the sentiment of the following sentences:\n" + "\n".join(sentences)
agent_prompts.append(sentiment_prompt)
# **Topic Extractor Prompt:** (Modify based on your specific topics)
topic_prompt = f"Extract the main topics discussed in the following text, focusing on food, service, and ambiance:\n{prompt}"
agent_prompts.append(topic_prompt)
# **Recommendation Generator Prompt:** (Modify based on your requirements)
positive_count = sum(label == "POSITIVE" for label in sentiment_labels)
negative_count = sum(label == "NEGATIVE" for label in sentiment_labels)
neutral_count = sum(label == "NEUTRAL" for label in sentiment_labels)
topic_mentions = "\n".join(f"{k}: {','.join(v)}" for k, v in extracted_entities.items())
recommendation_prompt = f"""Based on the sentiment analysis (positive: {positive_count}, negative: {negative_count}, neutral: {neutral_count}) and extracted topics ({topic_mentions}), suggest recommendations for the restaurant to improve."""
agent_prompts.append(recommendation_prompt)
return agent_prompts
def query_vectara(text):
user_message = text
# Read authentication parameters from the .env file
customer_id = os.getenv('CUSTOMER_ID')
corpus_id = os.getenv('CORPUS_ID')
api_key = os.getenv('API_KEY')
# Define the headers
api_key_header = {
"customer-id": customer_id,
"x-api-key": api_key
}
# Define the request body in the structure provided in the example
request_body = {
"query": [
{
"query": user_message,
"queryContext": "",
"start": 1,
"numResults": 25,
"contextConfig": {
"charsBefore": 0,
"charsAfter": 0,
"sentencesBefore": 2,
"sentencesAfter": 2,
"startTag": "%START_SNIPPET%",
"endTag": "%END_SNIPPET%",
},
"rerankingConfig": {
"rerankerId": 272725718,
"mmrConfig": {
"diversityBias": 0.35
}
},
"corpusKey": [
{
"customerId": customer_id,
"corpusId": corpus_id,
"semantics": 0,
"metadataFilter": "",
"lexicalInterpolationConfig": {
"lambda": 0
},
"dim": []
}
],
"summary": [
{
"maxSummarizedResults": 5,
"responseLang": "auto",
"summarizerPromptName": "vectara-summary-ext-v1.2.0"
}
]
}
]
}
# Make the API request using Gradio
response = requests.post(
"https://api.vectara.io/v1/query",
json=request_body, # Use json to automatically serialize the request body
verify=True,
headers=api_key_header
)
if response.status_code == 200:
query_data = response.json()
if query_data:
sources_info = []
# Extract the summary.
summary = query_data['responseSet'][0]['summary'][0]['text']
# Iterate over all response sets
for response_set in query_data.get('responseSet', []):
# Extract sources
# Limit to top 5 sources.
for source in response_set.get('response', [])[:5]:
source_metadata = source.get('metadata', [])
source_info = {}
for metadata in source_metadata:
metadata_name = metadata.get('name', '')
metadata_value = metadata.get('value', '')
if metadata_name == 'title':
source_info['title'] = metadata_value
elif metadata_name == 'author':
source_info['author'] = metadata_value
elif metadata_name == 'pageNumber':
source_info['page number'] = metadata_value
if source_info:
sources_info.append(source_info)
result = {"summary": summary, "sources": sources_info}
return f"{json.dumps(result, indent=2)}"
else:
return "No data found in the response."
else:
return f"Error: {response.status_code}"
# Define the main function to be used with Gradio
def generate_outputs(user_prompt):
# 1. Process prompt with langchain (replace with your actual implementation)
# processed_prompt = dspy_generate_agent_prompts(user_prompt) # Replaced langchain logic with DSPy function below
# 2. Generate synthetic data using DSPy's distributed computing capabilities
synthetic_data = generate_synthetic_data_distributed(user_prompt)
# 3. Combine user prompt and synthetic data
combined_data = f"{user_prompt}\n{synthetic_data}"
# 4. Generate prompts for agents using DSPy
agent_prompts = dspy_generate_agent_prompts(processed_prompt)
# 5. Use the chosen LLM for two of the prompts and vectara tool use for the third agent
output_1 = llm(agent_prompts[0], max_length=100)[0][combined_data]
output_2 = llm(agent_prompts[1], max_length=100)[0][combined_data]
output_3 = query_vectara(prompt)
# 6. Produce outputs with Langchain or DSPy (stand in section)
report, recommendations, visualization = produce_outputs(combined_data)
return report, recommendations, visualization
# Create the Gradio interface
gr.Interface(
fn=generate_outputs,
inputs=gr.Textbox(label="Enter a prompt"),
outputs=["textbox", "textbox", "image"],
title="Multi-Agent Prompt Processor",
description="Processes a prompt using Langchain, DSPy, and a chosen Hugging Face LLM to generate diverse outputs.",
).launch()
|