Spaces:

singhtech
/

AutoTabML

Sleeping

App Files Files Community

AutoTabML / app.py

singhtech

Update app.py

157df43 verified about 1 year ago

raw

history blame

15.7 kB

	import streamlit as st
	import pandas as pd
	import os
	from crewai import Agent, Task, Crew
	from langchain_groq import ChatGroq
	import streamlit_ace as st_ace
	import traceback
	import contextlib
	import io
	from crewai_tools import FileReadTool
	import matplotlib.pyplot as plt
	import glob
	from dotenv import load_dotenv

	# load the .env file
	load_dotenv()
	# Set up Groq API key
	groq_api_key = os.getenv("GROQ_API_KEY")


	def main():
	# Set custom CSS for UI
	set_custom_css()

	# Initialize session state for edited code
	if 'edited_code' not in st.session_state:
	st.session_state['edited_code'] = ""

	# Initialize session state for whether the initial code is generated
	if 'code_generated' not in st.session_state:
	st.session_state['code_generated'] = False

	# Header with futuristic design
	st.markdown("""
	<div class="header">
	<h1>CrewAI Machine Learning Assistant</h1>
	<p>Your AI-powered partner for machine learning projects.</p>
	</div>
	""", unsafe_allow_html=True)

	# Sidebar for customization options
	st.sidebar.title('Customization')
	model = st.sidebar.selectbox(
	'Choose a model',
	['llama3-8b-8192', "llama3-70b-8192"]
	)

	# Initialize LLM
	llm = initialize_llm(model)



	# User inputs
	user_question = st.text_area("Describe your ML problem:", key="user_question")
	uploaded_file = st.file_uploader("Upload a sample .csv of your data (optional)", key="uploaded_file")
	try:
	file_name = uploaded_file.name
	except:
	file_name = "dataset.csv"

	# Initialize agents
	agents = initialize_agents(llm,file_name)
	# Process uploaded file
	if uploaded_file:
	try:
	df = pd.read_csv(uploaded_file)
	st.write("Data successfully uploaded:")
	st.dataframe(df.head())
	data_upload = True
	except Exception as e:
	st.error(f"Error reading the file: {e}")
	data_upload = False
	else:
	df = None
	data_upload = False

	# Process button
	if st.button('Process'):
	tasks = create_tasks("Process",user_question,file_name, data_upload, df, None, st.session_state['edited_code'], None, agents)
	with st.spinner('Processing...'):
	crew = Crew(
	agents=list(agents.values()),
	tasks=tasks,
	verbose=2
	)

	result = crew.kickoff()

	if result: # Only call st_ace if code has a valid value
	code = result.strip("```")
	try:
	filt_idx = code.index("```")
	code = code[:filt_idx]
	except:
	pass
	st.session_state['edited_code'] = code
	st.session_state['code_generated'] = True

	st.session_state['edited_code'] = st_ace.st_ace(
	value=st.session_state['edited_code'],
	language='python',
	theme='monokai',
	keybinding='vscode',
	min_lines=20,
	max_lines=50
	)

	if st.session_state['code_generated']:
	# Show options for modification, debugging, and running the code
	suggestion = st.text_area("Suggest modifications to the generated code (optional):", key="suggestion")
	if st.button('Modify'):
	if st.session_state['edited_code'] and suggestion:
	tasks = create_tasks("Modify",user_question,file_name, data_upload, df, suggestion, st.session_state['edited_code'], None, agents)
	with st.spinner('Modifying code...'):
	crew = Crew(
	agents=list(agents.values()),
	tasks=tasks,
	verbose=2
	)

	result = crew.kickoff()

	if result: # Only call st_ace if code has a valid value
	code = result.strip("```")
	try:
	filter_idx = code.index("```")
	code = code[:filter_idx]
	except:
	pass
	st.session_state['edited_code'] = code

	st.write("Modified code:")
	st.session_state['edited_code']= st_ace.st_ace(
	value=st.session_state['edited_code'],
	language='python',
	theme='monokai',
	keybinding='vscode',
	min_lines=20,
	max_lines=50
	)

	debugger = st.text_area("Paste error message here for debugging (optional):", key="debugger")
	if st.button('Debug'):
	if st.session_state['edited_code'] and debugger:
	tasks = create_tasks("Debug",user_question,file_name, data_upload, df, None, st.session_state['edited_code'], debugger, agents)
	with st.spinner('Debugging code...'):
	crew = Crew(
	agents=list(agents.values()),
	tasks=tasks,
	verbose=2
	)

	result = crew.kickoff()

	if result: # Only call st_ace if code has a valid value
	code = result.strip("```")
	try:
	filter_idx = code.index("```")
	code = code[:filter_idx]
	except:
	pass
	st.session_state['edited_code'] = code

	st.write("Debugged code:")
	st.session_state['edited_code'] = st_ace.st_ace(
	value=st.session_state['edited_code'],
	language='python',
	theme='monokai',
	keybinding='vscode',
	min_lines=20,
	max_lines=50
	)

	if st.button('Run'):
	output = io.StringIO()
	with contextlib.redirect_stdout(output):
	try:
	globals().update({'dataset': df})
	final_code = st.session_state["edited_code"]

	with st.expander("Final Code"):
	st.code(final_code, language='python')

	exec(final_code, globals())
	result = output.getvalue()
	success = True
	except Exception as e:
	result = str(e)
	success = False

	st.subheader('Output:')
	st.text(result)

	figs = [manager.canvas.figure for manager in plt._pylab_helpers.Gcf.get_all_fig_managers()]
	if figs:
	st.subheader('Generated Plots:')
	for fig in figs:
	st.pyplot(fig)

	if success:
	st.success("Code executed successfully!")
	else:
	st.error("Code execution failed! Waiting for debugging input...")

	# Move the generated files section to the sidebar
	with st.sidebar:
	st.header('Output Files:')
	files = glob.glob(os.path.join("Output/", '*'))
	for file in files:
	if os.path.isfile(file):
	with open(file, 'rb') as f:
	st.download_button(label=f'Download {os.path.basename(file)}', data=f, file_name=os.path.basename(file))



	# Function to set custom CSS for futuristic UI
	def set_custom_css():
	st.markdown("""
	<style>
	body {
	background: #0e0e0e;
	color: #e0e0e0;
	font-family: 'Roboto', sans-serif;
	}
	.header {
	background: linear-gradient(135deg, #6e3aff, #b839ff);
	padding: 10px;
	border-radius: 10px;
	}
	.header h1, .header p {
	color: white;
	text-align: center;
	}
	.stButton button {
	background-color: #b839ff;
	color: white;
	border-radius: 10px;
	font-size: 16px;
	padding: 10px 20px;
	}
	.stButton button:hover {
	background-color: #6e3aff;
	color: #e0e0e0;
	}
	.spinner {
	display: flex;
	justify-content: center;
	align-items: center;
	}
	</style>
	""", unsafe_allow_html=True)

	# Function to initialize LLM
	def initialize_llm(model):
	return ChatGroq(
	temperature=0,
	groq_api_key=groq_api_key,
	model_name=model
	)

	# Function to initialize agents
	def initialize_agents(llm,file_name):
	file_read_tool = FileReadTool()
	return {
	"Data_Reader_Agent": Agent(
	role='Data_Reader_Agent',
	goal="Read the uploaded dataset and provide it to other agents.",
	backstory="Responsible for reading the uploaded dataset.",
	verbose=True,
	allow_delegation=False,
	llm=llm,
	tools=[file_read_tool]
	),
	"Problem_Definition_Agent": Agent(
	role='Problem_Definition_Agent',
	goal="Clarify the machine learning problem the user wants to solve.",
	backstory="Expert in defining machine learning problems.",
	verbose=True,
	allow_delegation=False,
	llm=llm,
	),
	"EDA_Agent": Agent(
	role='EDA_Agent',
	goal="Perform all possible Exploratory Data Analysis (EDA) on the data provided by the user.",
	backstory="Specializes in conducting comprehensive EDA to understand the data characteristics, distributions, and relationships.",
	verbose=True,
	allow_delegation=False,
	llm=llm,
	),
	"Feature_Engineering_Agent": Agent(
	role='Feature_Engineering_Agent',
	goal="Perform feature engineering on the data based on the EDA results provided by the EDA agent.",
	backstory="Expert in deriving new features, transforming existing features, and preprocessing data to prepare it for modeling.",
	verbose=True,
	allow_delegation=False,
	llm=llm,
	),
	"Model_Recommendation_Agent": Agent(
	role='Model_Recommendation_Agent',
	goal="Suggest the most suitable machine learning models.",
	backstory="Expert in recommending machine learning algorithms.",
	verbose=True,
	allow_delegation=False,
	llm=llm,
	),
	"Starter_Code_Generator_Agent": Agent(
	role='Starter_Code_Generator_Agent',
	goal=f"Generate starter Python code for the project. Always give dataset name as {file_name}",
	backstory="Code wizard for generating starter code templates.",
	verbose=True,
	allow_delegation=False,
	llm=llm,
	),
	"Code_Modification_Agent": Agent(
	role='Code_Modification_Agent',
	goal="Modify the generated Python code based on user suggestions.",
	backstory="Expert in adapting code according to user feedback.",
	verbose=True,
	allow_delegation=False,
	llm=llm,
	),
	# "Code_Runner_Agent": Agent(
	# role='Code_Runner_Agent',
	# goal="Run the generated Python code and catch any errors.",
	# backstory="Debugging expert.",
	# verbose=True,
	# allow_delegation=True,
	# llm=llm,
	# ),
	"Code_Debugger_Agent": Agent(
	role='Code_Debugger_Agent',
	goal="Debug the generated Python code.",
	backstory="Seasoned code debugger.",
	verbose=True,
	allow_delegation=False,
	llm=llm,
	),
	"Compiler_Agent":Agent(
	role = "Code_compiler",
	goal = "Extract only the python code.",
	backstory = "You are the compiler which extract only the python code.",
	verbose = True,
	allow_delegation = False,
	llm = llm
	)
	}

	# Function to create tasks based on user inputs
	def create_tasks(func_call,user_question,file_name, data_upload, df, suggestion, edited_code, debugger, agents):
	info = df.info()
	tasks = []
	if(func_call == "Process"):
	tasks.append(Task(
	description=f"Clarify the ML problem: {user_question}",
	agent=agents["Problem_Definition_Agent"],
	expected_output="A clear and concise definition of the ML problem."
	)
	)

	if data_upload:
	tasks.extend([
	Task(
	description=f"Evaluate the data provided by the file name . This is the data: {df}",
	agent=agents["EDA_Agent"],
	expected_output="An assessment of the EDA and preprocessing like dataset info, missing value, duplication, outliers etc. on the data provided"
	),
	Task(
	description=f"Feature Engineering on data {df} based on EDA output: {info}",
	agent=agents["Feature_Engineering_Agent"],
	expected_output="An assessment of the Featuring Engineering and preprocessing like handling missing values, handling duplication, handling outliers, feature encoding, feature scaling etc. on the data provided"
	)
	])

	tasks.extend([
	Task(
	description="Suggest suitable ML models.",
	agent=agents["Model_Recommendation_Agent"],
	expected_output="A list of suitable ML models."
	),
	Task(
	description=f"Generate starter Python code based on feature engineering, where column names are {df.columns.tolist()}. Generate only the code without any extra text",
	agent=agents["Starter_Code_Generator_Agent"],
	expected_output="Starter Python code."
	),
	])
	if(func_call == "Modify"):
	if suggestion:
	tasks.append(
	Task(
	description=f"Modify the already generated code {edited_code} according to the suggestion: {suggestion} \n\n Do not generate entire new code.",
	agent=agents["Code_Modification_Agent"],
	expected_output="Modified code."
	)
	)
	if(func_call == "Debug"):
	if debugger:
	tasks.append(
	Task(
	description=f"Debug and fix any errors for data with column names {df.columns.tolist()} with data as {df} in the generated code: {edited_code} \n\n According to the debugging: {debugger}. \n\n Do not generate entire new code. Just remove the error in the code by modifying only necessary parts of the code.",
	agent=agents["Code_Debugger_Agent"],
	expected_output="Debugged and successfully executed code."
	)
	)
	tasks.append(
	Task(
	description = "Your job is to only extract python code from string",
	agent = agents["Compiler_Agent"],
	expected_output = "Running python code."
	)
	)

	return tasks

	if __name__ == "__main__":
	main()