import gradio as gr from langchain_openai import ChatOpenAI # from dspy import Agent # Base class for custom agent # from dspy import spawn_processes # Distributed computing utility from transformers import pipeline # Choose model model_name = "dolphin-phi" # Load the chosen LLM model llm = pipeline("text-generation", model="TheBloke/dolphin-2_6-phi-2-GGUF") #Vectara config: # customer_id = # corpus_id = # api_key = # Brought from Vectara example from Tonic. Global variables to hold component references components = {} dotenv.load_dotenv() seamless_client = Client("TheBloke/dolphin-2_6-phi-2-GGUF") HuggingFace_Token = os.getenv("HuggingFace_Token") hf_token = os.getenv("HuggingFace_Token") base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id') model_directory = os.getenv('MODEL_DIRECTORY', 'default_model_directory') import requests # DSPy-based prompt generation from dspy.agents import Agent from dspy import spawn_processes from dspy.utils import SentenceSplitter, SentimentAnalyzer, NamedEntityRecognizer def dspy_generate_agent_prompts(prompt): """ Generates prompts for different agents based on the provided prompt and DSPy functionalities. Args: prompt (str): The user-provided prompt (e.g., customer reviews). Returns: list: A list containing agent-specific prompts. """ # 1. Split the prompt into individual sentences sentences = SentenceSplitter().process(prompt) # 2. Analyze sentiment for each sentence sentiment_analyzer = SentimentAnalyzer() sentiment_labels = [] for sentence in sentences: sentiment_labels.append(sentiment_analyzer.analyze(sentence)) # 3. Extract named entities related to specific topics ner = NamedEntityRecognizer(model_name="en_core_web_sm") extracted_entities = {} for sentence in sentences: entities = ner.process(sentence) for entity in entities: if entity.label_ in ["FOOD", "ORG", "LOCATION"]: # Customize entity labels based on needs extracted_entities.setdefault(entity.label_, []).append(entity.text) # 4. Craft prompts for each agent (incomplete) agent_prompts = [] # **Sentiment Analyzer Prompt:** sentiment_prompt = f"Analyze the sentiment of the following sentences:\n" + "\n".join(sentences) agent_prompts.append(sentiment_prompt) # **Topic Extractor Prompt:** (Modify based on your specific topics) topic_prompt = f"Extract the main topics discussed in the following text, focusing on food, service, and ambiance:\n{prompt}" agent_prompts.append(topic_prompt) # **Recommendation Generator Prompt:** (Modify based on your requirements) positive_count = sum(label == "POSITIVE" for label in sentiment_labels) negative_count = sum(label == "NEGATIVE" for label in sentiment_labels) neutral_count = sum(label == "NEUTRAL" for label in sentiment_labels) topic_mentions = "\n".join(f"{k}: {','.join(v)}" for k, v in extracted_entities.items()) recommendation_prompt = f"""Based on the sentiment analysis (positive: {positive_count}, negative: {negative_count}, neutral: {neutral_count}) and extracted topics ({topic_mentions}), suggest recommendations for the restaurant to improve.""" agent_prompts.append(recommendation_prompt) return agent_prompts def query_vectara(text): user_message = text # Read authentication parameters from the .env file customer_id = os.getenv('CUSTOMER_ID') corpus_id = os.getenv('CORPUS_ID') api_key = os.getenv('API_KEY') # Define the headers api_key_header = { "customer-id": customer_id, "x-api-key": api_key } # Define the request body in the structure provided in the example request_body = { "query": [ { "query": user_message, "queryContext": "", "start": 1, "numResults": 25, "contextConfig": { "charsBefore": 0, "charsAfter": 0, "sentencesBefore": 2, "sentencesAfter": 2, "startTag": "%START_SNIPPET%", "endTag": "%END_SNIPPET%", }, "rerankingConfig": { "rerankerId": 272725718, "mmrConfig": { "diversityBias": 0.35 } }, "corpusKey": [ { "customerId": customer_id, "corpusId": corpus_id, "semantics": 0, "metadataFilter": "", "lexicalInterpolationConfig": { "lambda": 0 }, "dim": [] } ], "summary": [ { "maxSummarizedResults": 5, "responseLang": "auto", "summarizerPromptName": "vectara-summary-ext-v1.2.0" } ] } ] } # Make the API request using Gradio response = requests.post( "https://api.vectara.io/v1/query", json=request_body, # Use json to automatically serialize the request body verify=True, headers=api_key_header ) if response.status_code == 200: query_data = response.json() if query_data: sources_info = [] # Extract the summary. summary = query_data['responseSet'][0]['summary'][0]['text'] # Iterate over all response sets for response_set in query_data.get('responseSet', []): # Extract sources # Limit to top 5 sources. for source in response_set.get('response', [])[:5]: source_metadata = source.get('metadata', []) source_info = {} for metadata in source_metadata: metadata_name = metadata.get('name', '') metadata_value = metadata.get('value', '') if metadata_name == 'title': source_info['title'] = metadata_value elif metadata_name == 'author': source_info['author'] = metadata_value elif metadata_name == 'pageNumber': source_info['page number'] = metadata_value if source_info: sources_info.append(source_info) result = {"summary": summary, "sources": sources_info} return f"{json.dumps(result, indent=2)}" else: return "No data found in the response." else: return f"Error: {response.status_code}" # Define the main function to be used with Gradio def generate_outputs(user_prompt): # 1. Process prompt with langchain (replace with your actual implementation) # processed_prompt = dspy_generate_agent_prompts(user_prompt) # Replaced langchain logic with DSPy function below # 2. Generate synthetic data using DSPy's distributed computing capabilities synthetic_data = generate_synthetic_data_distributed(user_prompt) # 3. Combine user prompt and synthetic data combined_data = f"{user_prompt}\n{synthetic_data}" # 4. Generate prompts for agents using DSPy agent_prompts = dspy_generate_agent_prompts(processed_prompt) # 5. Use the chosen LLM for two of the prompts and vectara tool use for the third agent output_1 = llm(agent_prompts[0], max_length=100)[0][combined_data] output_2 = llm(agent_prompts[1], max_length=100)[0][combined_data] output_3 = query_vectara(prompt) # 6. Produce outputs with Langchain or DSPy (stand in section) report, recommendations, visualization = produce_outputs(combined_data) return report, recommendations, visualization # Create the Gradio interface gr.Interface( fn=generate_outputs, inputs=gr.Textbox(label="Enter a prompt"), outputs=["textbox", "textbox", "image"], title="Multi-Agent Prompt Processor", description="Processes a prompt using Langchain, DSPy, and a chosen Hugging Face LLM to generate diverse outputs.", ).launch()