|
import gradio as gr |
|
from langchain_openai import ChatOpenAI |
|
|
|
|
|
from transformers import pipeline |
|
|
|
|
|
model_name = "dolphin-phi" |
|
|
|
|
|
llm = pipeline("text-generation", model="TheBloke/dolphin-2_6-phi-2-GGUF") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
components = {} |
|
dotenv.load_dotenv() |
|
seamless_client = Client("TheBloke/dolphin-2_6-phi-2-GGUF") |
|
HuggingFace_Token = os.getenv("HuggingFace_Token") |
|
hf_token = os.getenv("HuggingFace_Token") |
|
base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id') |
|
model_directory = os.getenv('MODEL_DIRECTORY', 'default_model_directory') |
|
import requests |
|
|
|
|
|
from dspy.agents import Agent |
|
from dspy import spawn_processes |
|
from dspy.utils import SentenceSplitter, SentimentAnalyzer, NamedEntityRecognizer |
|
|
|
def dspy_generate_agent_prompts(prompt): |
|
""" |
|
Generates prompts for different agents based on the provided prompt and DSPy functionalities. |
|
|
|
Args: |
|
prompt (str): The user-provided prompt (e.g., customer reviews). |
|
|
|
Returns: |
|
list: A list containing agent-specific prompts. |
|
""" |
|
|
|
|
|
sentences = SentenceSplitter().process(prompt) |
|
|
|
|
|
sentiment_analyzer = SentimentAnalyzer() |
|
sentiment_labels = [] |
|
for sentence in sentences: |
|
sentiment_labels.append(sentiment_analyzer.analyze(sentence)) |
|
|
|
|
|
ner = NamedEntityRecognizer(model_name="en_core_web_sm") |
|
extracted_entities = {} |
|
for sentence in sentences: |
|
entities = ner.process(sentence) |
|
for entity in entities: |
|
if entity.label_ in ["FOOD", "ORG", "LOCATION"]: |
|
extracted_entities.setdefault(entity.label_, []).append(entity.text) |
|
|
|
|
|
agent_prompts = [] |
|
|
|
|
|
sentiment_prompt = f"Analyze the sentiment of the following sentences:\n" + "\n".join(sentences) |
|
agent_prompts.append(sentiment_prompt) |
|
|
|
|
|
topic_prompt = f"Extract the main topics discussed in the following text, focusing on food, service, and ambiance:\n{prompt}" |
|
agent_prompts.append(topic_prompt) |
|
|
|
|
|
positive_count = sum(label == "POSITIVE" for label in sentiment_labels) |
|
negative_count = sum(label == "NEGATIVE" for label in sentiment_labels) |
|
neutral_count = sum(label == "NEUTRAL" for label in sentiment_labels) |
|
topic_mentions = "\n".join(f"{k}: {','.join(v)}" for k, v in extracted_entities.items()) |
|
|
|
recommendation_prompt = f"""Based on the sentiment analysis (positive: {positive_count}, negative: {negative_count}, neutral: {neutral_count}) and extracted topics ({topic_mentions}), suggest recommendations for the restaurant to improve.""" |
|
agent_prompts.append(recommendation_prompt) |
|
|
|
return agent_prompts |
|
|
|
def query_vectara(text): |
|
user_message = text |
|
|
|
|
|
customer_id = os.getenv('CUSTOMER_ID') |
|
corpus_id = os.getenv('CORPUS_ID') |
|
api_key = os.getenv('API_KEY') |
|
|
|
|
|
api_key_header = { |
|
"customer-id": customer_id, |
|
"x-api-key": api_key |
|
} |
|
|
|
|
|
request_body = { |
|
"query": [ |
|
{ |
|
"query": user_message, |
|
"queryContext": "", |
|
"start": 1, |
|
"numResults": 25, |
|
"contextConfig": { |
|
"charsBefore": 0, |
|
"charsAfter": 0, |
|
"sentencesBefore": 2, |
|
"sentencesAfter": 2, |
|
"startTag": "%START_SNIPPET%", |
|
"endTag": "%END_SNIPPET%", |
|
}, |
|
"rerankingConfig": { |
|
"rerankerId": 272725718, |
|
"mmrConfig": { |
|
"diversityBias": 0.35 |
|
} |
|
}, |
|
"corpusKey": [ |
|
{ |
|
"customerId": customer_id, |
|
"corpusId": corpus_id, |
|
"semantics": 0, |
|
"metadataFilter": "", |
|
"lexicalInterpolationConfig": { |
|
"lambda": 0 |
|
}, |
|
"dim": [] |
|
} |
|
], |
|
"summary": [ |
|
{ |
|
"maxSummarizedResults": 5, |
|
"responseLang": "auto", |
|
"summarizerPromptName": "vectara-summary-ext-v1.2.0" |
|
} |
|
] |
|
} |
|
] |
|
} |
|
|
|
|
|
response = requests.post( |
|
"https://api.vectara.io/v1/query", |
|
json=request_body, |
|
verify=True, |
|
headers=api_key_header |
|
) |
|
|
|
if response.status_code == 200: |
|
query_data = response.json() |
|
if query_data: |
|
sources_info = [] |
|
|
|
|
|
summary = query_data['responseSet'][0]['summary'][0]['text'] |
|
|
|
|
|
for response_set in query_data.get('responseSet', []): |
|
|
|
|
|
for source in response_set.get('response', [])[:5]: |
|
source_metadata = source.get('metadata', []) |
|
source_info = {} |
|
|
|
for metadata in source_metadata: |
|
metadata_name = metadata.get('name', '') |
|
metadata_value = metadata.get('value', '') |
|
|
|
if metadata_name == 'title': |
|
source_info['title'] = metadata_value |
|
elif metadata_name == 'author': |
|
source_info['author'] = metadata_value |
|
elif metadata_name == 'pageNumber': |
|
source_info['page number'] = metadata_value |
|
|
|
if source_info: |
|
sources_info.append(source_info) |
|
|
|
result = {"summary": summary, "sources": sources_info} |
|
return f"{json.dumps(result, indent=2)}" |
|
else: |
|
return "No data found in the response." |
|
else: |
|
return f"Error: {response.status_code}" |
|
|
|
|
|
def generate_outputs(user_prompt): |
|
|
|
|
|
|
|
|
|
synthetic_data = generate_synthetic_data_distributed(user_prompt) |
|
|
|
|
|
combined_data = f"{user_prompt}\n{synthetic_data}" |
|
|
|
|
|
agent_prompts = dspy_generate_agent_prompts(processed_prompt) |
|
|
|
|
|
output_1 = llm(agent_prompts[0], max_length=100)[0][combined_data] |
|
output_2 = llm(agent_prompts[1], max_length=100)[0][combined_data] |
|
output_3 = query_vectara(prompt) |
|
|
|
|
|
report, recommendations, visualization = produce_outputs(combined_data) |
|
|
|
return report, recommendations, visualization |
|
|
|
|
|
gr.Interface( |
|
fn=generate_outputs, |
|
inputs=gr.Textbox(label="Enter a prompt"), |
|
outputs=["textbox", "textbox", "image"], |
|
title="Multi-Agent Prompt Processor", |
|
description="Processes a prompt using Langchain, DSPy, and a chosen Hugging Face LLM to generate diverse outputs.", |
|
).launch() |
|
|