Spaces:

Solshine
/

LEAP_GAIA

Runtime error

File size: 8,397 Bytes

bf63887
15d7664
b7b15af
 
468efae
302bb07
468efae
98de42b
302bb07
468efae
cab3bb7
302bb07
1e93487
 
 
 
cab3bb7
 
 
 
 
 
 
 
 
98de42b
1e93487
468efae
 
98de42b
468efae
302bb07
468efae
 
 
302bb07
468efae
 
302bb07
468efae
 
 
302bb07
468efae
 
302bb07
468efae
 
 
 
 
302bb07
468efae
 
 
 
 
 
1e93487
468efae
302bb07
1e93487
468efae
302bb07
468efae
 
 
e9ea4f8
468efae
 
 
e9ea4f8
468efae
 
 
 
 
e9ea4f8
468efae
 
e9ea4f8
468efae
e9ea4f8
2e8b875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468efae
 
 
98de42b
302bb07
468efae
98de42b
302bb07
468efae
 
302bb07
468efae
 
302bb07
2e8b875
 
 
 
302bb07
2e8b875
468efae
302bb07
468efae
302bb07
468efae

import gradio as gr
from langchain_openai import ChatOpenAI
# from dspy import Agent  # Base class for custom agent
# from dspy import spawn_processes  # Distributed computing utility
from transformers import pipeline

# Choose model
model_name = "dolphin-phi"

# Load the chosen LLM model
llm = pipeline("text-generation", model="TheBloke/dolphin-2_6-phi-2-GGUF")

#Vectara config:
# customer_id =
# corpus_id = 
# api_key = 

# Brought from Vectara example from Tonic. Global variables to hold component references
components = {}
dotenv.load_dotenv()
seamless_client = Client("TheBloke/dolphin-2_6-phi-2-GGUF")
HuggingFace_Token = os.getenv("HuggingFace_Token")
hf_token = os.getenv("HuggingFace_Token")
base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id')
model_directory = os.getenv('MODEL_DIRECTORY', 'default_model_directory')
import requests

# DSPy-based prompt generation
from dspy.agents import Agent
from dspy import spawn_processes
from dspy.utils import SentenceSplitter, SentimentAnalyzer, NamedEntityRecognizer

def dspy_generate_agent_prompts(prompt):
    """
    Generates prompts for different agents based on the provided prompt and DSPy functionalities.

    Args:
        prompt (str): The user-provided prompt (e.g., customer reviews).

    Returns:
        list: A list containing agent-specific prompts.
    """

    # 1. Split the prompt into individual sentences
    sentences = SentenceSplitter().process(prompt)

    # 2. Analyze sentiment for each sentence
    sentiment_analyzer = SentimentAnalyzer()
    sentiment_labels = []
    for sentence in sentences:
        sentiment_labels.append(sentiment_analyzer.analyze(sentence))

    # 3. Extract named entities related to specific topics
    ner = NamedEntityRecognizer(model_name="en_core_web_sm")
    extracted_entities = {}
    for sentence in sentences:
        entities = ner.process(sentence)
        for entity in entities:
            if entity.label_ in ["FOOD", "ORG", "LOCATION"]:  # Customize entity labels based on needs
                extracted_entities.setdefault(entity.label_, []).append(entity.text)

    # 4. Craft prompts for each agent (incomplete)
    agent_prompts = []

    # **Sentiment Analyzer Prompt:**
    sentiment_prompt = f"Analyze the sentiment of the following sentences:\n" + "\n".join(sentences)
    agent_prompts.append(sentiment_prompt)

    # **Topic Extractor Prompt:** (Modify based on your specific topics)
    topic_prompt = f"Extract the main topics discussed in the following text, focusing on food, service, and ambiance:\n{prompt}"
    agent_prompts.append(topic_prompt)

    # **Recommendation Generator Prompt:** (Modify based on your requirements)
    positive_count = sum(label == "POSITIVE" for label in sentiment_labels)
    negative_count = sum(label == "NEGATIVE" for label in sentiment_labels)
    neutral_count = sum(label == "NEUTRAL" for label in sentiment_labels)
    topic_mentions = "\n".join(f"{k}: {','.join(v)}" for k, v in extracted_entities.items())

    recommendation_prompt = f"""Based on the sentiment analysis (positive: {positive_count}, negative: {negative_count}, neutral: {neutral_count}) and extracted topics ({topic_mentions}), suggest recommendations for the restaurant to improve."""
    agent_prompts.append(recommendation_prompt)

    return agent_prompts

def query_vectara(text):
    user_message = text

    # Read authentication parameters from the .env file
    customer_id = os.getenv('CUSTOMER_ID')
    corpus_id = os.getenv('CORPUS_ID')
    api_key = os.getenv('API_KEY')

    # Define the headers
    api_key_header = {
        "customer-id": customer_id,
        "x-api-key": api_key
    }

    # Define the request body in the structure provided in the example
    request_body = {
        "query": [
            {
                "query": user_message,
                "queryContext": "",
                "start": 1,
                "numResults": 25,
                "contextConfig": {
                    "charsBefore": 0,
                    "charsAfter": 0,
                    "sentencesBefore": 2,
                    "sentencesAfter": 2,
                    "startTag": "%START_SNIPPET%",
                    "endTag": "%END_SNIPPET%",
                },
                "rerankingConfig": {
                    "rerankerId": 272725718,
                    "mmrConfig": {
                        "diversityBias": 0.35
                    }
                },
                "corpusKey": [
                    {
                        "customerId": customer_id,
                        "corpusId": corpus_id,
                        "semantics": 0,
                        "metadataFilter": "",
                        "lexicalInterpolationConfig": {
                            "lambda": 0
                        },
                        "dim": []
                    }
                ],
                "summary": [
                    {
                        "maxSummarizedResults": 5,
                        "responseLang": "auto",
                        "summarizerPromptName": "vectara-summary-ext-v1.2.0"
                    }
                ]
            }
        ]
    }

    # Make the API request using Gradio
    response = requests.post(
        "https://api.vectara.io/v1/query",
        json=request_body,  # Use json to automatically serialize the request body
        verify=True,
        headers=api_key_header
    )

    if response.status_code == 200:
        query_data = response.json()
        if query_data:
            sources_info = []

            # Extract the summary.
            summary = query_data['responseSet'][0]['summary'][0]['text']

            # Iterate over all response sets
            for response_set in query_data.get('responseSet', []):
                # Extract sources
                # Limit to top 5 sources.
                for source in response_set.get('response', [])[:5]:
                    source_metadata = source.get('metadata', [])
                    source_info = {}

                    for metadata in source_metadata:
                        metadata_name = metadata.get('name', '')
                        metadata_value = metadata.get('value', '')

                        if metadata_name == 'title':
                            source_info['title'] = metadata_value
                        elif metadata_name == 'author':
                            source_info['author'] = metadata_value
                        elif metadata_name == 'pageNumber':
                            source_info['page number'] = metadata_value

                    if source_info:
                        sources_info.append(source_info)

            result = {"summary": summary, "sources": sources_info}
            return f"{json.dumps(result, indent=2)}"
        else:
            return "No data found in the response."
    else:
        return f"Error: {response.status_code}"

# Define the main function to be used with Gradio
def generate_outputs(user_prompt):
    # 1. Process prompt with langchain (replace with your actual implementation)
    # processed_prompt = dspy_generate_agent_prompts(user_prompt)  # Replaced langchain logic with DSPy function below

    # 2. Generate synthetic data using DSPy's distributed computing capabilities
    synthetic_data = generate_synthetic_data_distributed(user_prompt)

    # 3. Combine user prompt and synthetic data
    combined_data = f"{user_prompt}\n{synthetic_data}"

    # 4. Generate prompts for agents using DSPy
    agent_prompts = dspy_generate_agent_prompts(processed_prompt)

    # 5. Use the chosen LLM for two of the prompts and vectara tool use for the third agent
    output_1 = llm(agent_prompts[0], max_length=100)[0][combined_data]
    output_2 = llm(agent_prompts[1], max_length=100)[0][combined_data]
    output_3 = query_vectara(prompt)

    # 6. Produce outputs with Langchain or DSPy (stand in section)
    report, recommendations, visualization = produce_outputs(combined_data)

    return report, recommendations, visualization

# Create the Gradio interface
gr.Interface(
    fn=generate_outputs,
    inputs=gr.Textbox(label="Enter a prompt"),
    outputs=["textbox", "textbox", "image"],
    title="Multi-Agent Prompt Processor",
    description="Processes a prompt using Langchain, DSPy, and a chosen Hugging Face LLM to generate diverse outputs.",
).launch()