File size: 5,858 Bytes
1f5812b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import streamlit as st
from transformers import pipeline
import requests
import pandas as pd
import re
# Agent Classes
class UseCaseAgent:
def __init__(self):
"""Agent to generate AI/ML use cases."""
self.generator = pipeline("text-generation", model="gpt2")
def generate_use_cases(self, industry, trends):
"""Generate 3 use cases with a brief debrief based on industry and trends."""
prompt = (
f"Industry: {industry}\n"
f"Trends: {trends}\n"
f"Suggest 3 AI/ML/GenAI use cases with a brief debrief for each to improve operations and customer satisfaction:"
"\n1. "
)
result = self.generator(prompt, max_length=300, num_return_sequences=1)
use_cases = result[0]["generated_text"]
# Format the output into a list by extracting each line that starts with a number
use_case_list = re.findall(r'\d+\.\s*(.*?)(?:\n|$)', use_cases)
# Limit the use cases to 3
return use_case_list[:3]
class ResourceAgent:
def __init__(self):
"""Agent to search and retrieve datasets."""
pass
def search_huggingface(self, query):
"""Search datasets on HuggingFace."""
hf_url = f"https://huggingface.co/api/models?search={query}"
response = requests.get(hf_url)
return response.json()[:5] if response.status_code == 200 else []
def search_kaggle(self, query):
"""Search datasets on Kaggle."""
kaggle_url = f"https://www.kaggle.com/api/v1/datasets/list?search={query}"
response = requests.get(kaggle_url)
return response.json()[:5] if response.status_code == 200 else []
# Multi-Agent System
class MultiAgentSystem:
def __init__(self):
self.use_case_agent = UseCaseAgent()
self.resource_agent = ResourceAgent()
def process_query(self, industry_query, trends_query):
"""End-to-end query processing."""
use_cases = self.use_case_agent.generate_use_cases(industry_query, trends_query)
return use_cases
def fetch_datasets(self, use_cases):
"""Fetch relevant datasets based on generated use cases."""
keywords = self.extract_keywords(use_cases)
datasets = {}
for keyword in keywords:
hf_datasets = self.resource_agent.search_huggingface(keyword)
kaggle_datasets = self.resource_agent.search_kaggle(keyword)
datasets[keyword] = {
"huggingface": hf_datasets,
"kaggle": kaggle_datasets
}
return datasets
def extract_keywords(self, use_cases):
"""Extract relevant keywords from use cases for dataset search."""
# Simple keyword extraction: split by spaces and take the first two words as keywords
keywords = set()
for use_case in use_cases:
words = re.findall(r'\w+', use_case)
if words:
keywords.add(words[0]) # For simplicity, take the first word as a keyword
return list(keywords)
# Streamlit UI
def run_streamlit_ui():
st.title("Market Research & AI Use Case Generator")
st.write("Generate actionable insights and find relevant datasets.")
mas = MultiAgentSystem()
# Trends and Use Case Generation
st.header("AI/ML Use Case Generation")
industry_query = st.text_input("Enter industry/company:")
st.caption("Example: Automotive, Retail, Healthcare, etc.")
trends_query = st.text_input("Enter industry trends or focus areas:")
st.caption("Example: Supply chain optimization, Customer experience, etc.")
# Store use cases in session state
if "use_cases" not in st.session_state:
st.session_state["use_cases"] = []
if st.button("Generate Use Cases"):
with st.spinner("Generating insights..."):
st.session_state["use_cases"] = mas.process_query(industry_query, trends_query)
st.subheader("Proposed Use Cases")
for i, use_case in enumerate(st.session_state["use_cases"], start=1):
st.write(f"**Use Case {i}:** {use_case}")
# Add a button to search for relevant datasets
if st.session_state["use_cases"]:
st.subheader("Search for Relevant Datasets")
if st.button("Search Datasets"):
with st.spinner("Searching datasets..."):
datasets = mas.fetch_datasets(st.session_state["use_cases"])
for keyword, dataset_info in datasets.items():
st.write(f"### Datasets related to: {keyword}")
# HuggingFace Datasets
st.subheader("HuggingFace Datasets")
if dataset_info["huggingface"]:
for dataset in dataset_info["huggingface"]:
dataset_id = dataset.get('modelId', 'Unknown ID')
dataset_url = f"https://huggingface.co/models/{dataset_id}"
st.write(f"- [{dataset_id}]({dataset_url})")
else:
st.write("No relevant datasets found on HuggingFace.")
# Kaggle Datasets
st.subheader("Kaggle Datasets")
if dataset_info["kaggle"]:
for dataset in dataset_info["kaggle"]:
dataset_title = dataset.get('title', 'Unknown Title')
dataset_url = dataset.get('url', '#')
st.write(f"- [{dataset_title}]({dataset_url})")
else:
st.write("No relevant datasets found on Kaggle.")
if __name__ == "__main__":
run_streamlit_ui()
|