Anuj02003 commited on
Commit
1f5812b
·
verified ·
1 Parent(s): 29122db

Upload 2 files

Browse files
Files changed (2) hide show
  1. assignment.py +142 -0
  2. requirements.txt +4 -0
assignment.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import requests
4
+ import pandas as pd
5
+ import re
6
+
7
+ # Agent Classes
8
+ class UseCaseAgent:
9
+ def __init__(self):
10
+ """Agent to generate AI/ML use cases."""
11
+ self.generator = pipeline("text-generation", model="gpt2")
12
+
13
+ def generate_use_cases(self, industry, trends):
14
+ """Generate 3 use cases with a brief debrief based on industry and trends."""
15
+ prompt = (
16
+ f"Industry: {industry}\n"
17
+ f"Trends: {trends}\n"
18
+ f"Suggest 3 AI/ML/GenAI use cases with a brief debrief for each to improve operations and customer satisfaction:"
19
+ "\n1. "
20
+ )
21
+ result = self.generator(prompt, max_length=300, num_return_sequences=1)
22
+ use_cases = result[0]["generated_text"]
23
+
24
+ # Format the output into a list by extracting each line that starts with a number
25
+ use_case_list = re.findall(r'\d+\.\s*(.*?)(?:\n|$)', use_cases)
26
+
27
+ # Limit the use cases to 3
28
+ return use_case_list[:3]
29
+
30
+
31
+ class ResourceAgent:
32
+ def __init__(self):
33
+ """Agent to search and retrieve datasets."""
34
+ pass
35
+
36
+ def search_huggingface(self, query):
37
+ """Search datasets on HuggingFace."""
38
+ hf_url = f"https://huggingface.co/api/models?search={query}"
39
+ response = requests.get(hf_url)
40
+ return response.json()[:5] if response.status_code == 200 else []
41
+
42
+ def search_kaggle(self, query):
43
+ """Search datasets on Kaggle."""
44
+ kaggle_url = f"https://www.kaggle.com/api/v1/datasets/list?search={query}"
45
+ response = requests.get(kaggle_url)
46
+ return response.json()[:5] if response.status_code == 200 else []
47
+
48
+
49
+ # Multi-Agent System
50
+ class MultiAgentSystem:
51
+ def __init__(self):
52
+ self.use_case_agent = UseCaseAgent()
53
+ self.resource_agent = ResourceAgent()
54
+
55
+ def process_query(self, industry_query, trends_query):
56
+ """End-to-end query processing."""
57
+ use_cases = self.use_case_agent.generate_use_cases(industry_query, trends_query)
58
+ return use_cases
59
+
60
+ def fetch_datasets(self, use_cases):
61
+ """Fetch relevant datasets based on generated use cases."""
62
+ keywords = self.extract_keywords(use_cases)
63
+ datasets = {}
64
+
65
+ for keyword in keywords:
66
+ hf_datasets = self.resource_agent.search_huggingface(keyword)
67
+ kaggle_datasets = self.resource_agent.search_kaggle(keyword)
68
+ datasets[keyword] = {
69
+ "huggingface": hf_datasets,
70
+ "kaggle": kaggle_datasets
71
+ }
72
+ return datasets
73
+
74
+ def extract_keywords(self, use_cases):
75
+ """Extract relevant keywords from use cases for dataset search."""
76
+ # Simple keyword extraction: split by spaces and take the first two words as keywords
77
+ keywords = set()
78
+ for use_case in use_cases:
79
+ words = re.findall(r'\w+', use_case)
80
+ if words:
81
+ keywords.add(words[0]) # For simplicity, take the first word as a keyword
82
+ return list(keywords)
83
+
84
+
85
+ # Streamlit UI
86
+ def run_streamlit_ui():
87
+ st.title("Market Research & AI Use Case Generator")
88
+ st.write("Generate actionable insights and find relevant datasets.")
89
+
90
+ mas = MultiAgentSystem()
91
+
92
+ # Trends and Use Case Generation
93
+ st.header("AI/ML Use Case Generation")
94
+ industry_query = st.text_input("Enter industry/company:")
95
+ st.caption("Example: Automotive, Retail, Healthcare, etc.")
96
+ trends_query = st.text_input("Enter industry trends or focus areas:")
97
+ st.caption("Example: Supply chain optimization, Customer experience, etc.")
98
+
99
+ # Store use cases in session state
100
+ if "use_cases" not in st.session_state:
101
+ st.session_state["use_cases"] = []
102
+
103
+ if st.button("Generate Use Cases"):
104
+ with st.spinner("Generating insights..."):
105
+ st.session_state["use_cases"] = mas.process_query(industry_query, trends_query)
106
+ st.subheader("Proposed Use Cases")
107
+ for i, use_case in enumerate(st.session_state["use_cases"], start=1):
108
+ st.write(f"**Use Case {i}:** {use_case}")
109
+
110
+ # Add a button to search for relevant datasets
111
+ if st.session_state["use_cases"]:
112
+ st.subheader("Search for Relevant Datasets")
113
+ if st.button("Search Datasets"):
114
+ with st.spinner("Searching datasets..."):
115
+ datasets = mas.fetch_datasets(st.session_state["use_cases"])
116
+
117
+ for keyword, dataset_info in datasets.items():
118
+ st.write(f"### Datasets related to: {keyword}")
119
+
120
+ # HuggingFace Datasets
121
+ st.subheader("HuggingFace Datasets")
122
+ if dataset_info["huggingface"]:
123
+ for dataset in dataset_info["huggingface"]:
124
+ dataset_id = dataset.get('modelId', 'Unknown ID')
125
+ dataset_url = f"https://huggingface.co/models/{dataset_id}"
126
+ st.write(f"- [{dataset_id}]({dataset_url})")
127
+ else:
128
+ st.write("No relevant datasets found on HuggingFace.")
129
+
130
+ # Kaggle Datasets
131
+ st.subheader("Kaggle Datasets")
132
+ if dataset_info["kaggle"]:
133
+ for dataset in dataset_info["kaggle"]:
134
+ dataset_title = dataset.get('title', 'Unknown Title')
135
+ dataset_url = dataset.get('url', '#')
136
+ st.write(f"- [{dataset_title}]({dataset_url})")
137
+ else:
138
+ st.write("No relevant datasets found on Kaggle.")
139
+
140
+
141
+ if __name__ == "__main__":
142
+ run_streamlit_ui()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ transformers
4
+ requests