Spaces:
Sleeping
Sleeping
import pandas as pd | |
import networkx as nx | |
import matplotlib.pyplot as plt | |
from io import BytesIO | |
from PIL import Image | |
import gradio as gr | |
# Load and preprocess the dataset | |
file_path = "cbinsights_data.csv" # Replace with your file path | |
data = pd.read_csv(file_path) | |
# Rename columns based on the first row and drop the header row | |
data.columns = data.iloc[0] | |
data = data[1:] | |
data.columns = ["Company", "Valuation_Billions", "Date_Joined", "Country", "City", "Industry", "Select_Investors"] | |
# Clean and prepare data | |
data["Valuation_Billions"] = data["Valuation_Billions"].str.replace('$', '').str.split('.').str[0] | |
data["Valuation_Billions"] = pd.to_numeric(data["Valuation_Billions"], errors='coerce') | |
data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x) | |
# Parse the "Select_Investors" column to map investors to companies | |
investor_company_mapping = {} | |
for _, row in data.iterrows(): | |
company = row["Company"] | |
investors = row["Select_Investors"] | |
if pd.notnull(investors): | |
for investor in investors.split(","): | |
investor = investor.strip() | |
if investor not in investor_company_mapping: | |
investor_company_mapping[investor] = [] | |
investor_company_mapping[investor].append(company) | |
# Gradio app functions | |
def filter_investors_by_country_and_industry(selected_country, selected_industry): | |
filtered_data = data | |
# Apply filters | |
if selected_country != "All": | |
filtered_data = filtered_data[filtered_data["Country"] == selected_country] | |
if selected_industry != "All": | |
filtered_data = filtered_data[filtered_data["Industry"] == selected_industry] | |
# Calculate total valuation per investor | |
investor_valuations = {} | |
for investor, companies in investor_company_mapping.items(): | |
total_valuation = 0 | |
for company in companies: | |
if company in filtered_data["Company"].values: | |
valuation = filtered_data.loc[filtered_data["Company"] == company, "Valuation_Billions"].values | |
total_valuation += valuation[0] if len(valuation) > 0 else 0 | |
if total_valuation >= 20: # Filter by total valuation | |
investor_valuations[investor] = total_valuation | |
return list(investor_valuations.keys()), filtered_data | |
def generate_graph(selected_investors, filtered_data): | |
if not selected_investors: | |
return None | |
# Filter the investor-to-company mapping | |
filtered_mapping = {} | |
for investor, companies in investor_company_mapping.items(): | |
if investor in selected_investors: | |
filtered_companies = [c for c in companies if c in filtered_data["Company"].values] | |
if filtered_companies: | |
filtered_mapping[investor] = filtered_companies | |
# Use the filtered mapping to build the graph | |
G = nx.Graph() | |
for investor, companies in filtered_mapping.items(): | |
for company in companies: | |
G.add_edge(investor, company) | |
# Node sizes based on valuation | |
node_sizes = [] | |
for node in G.nodes: | |
if node in filtered_mapping: # Fixed size for investors | |
node_sizes.append(2000) | |
else: # Company size based on valuation | |
valuation = filtered_data.loc[filtered_data["Company"] == node, "Valuation_Billions"].values | |
node_sizes.append(valuation[0] * 50 if len(valuation) > 0 else 100) | |
# Node colors | |
node_colors = [] | |
for node in G.nodes: | |
if node in filtered_mapping: | |
node_colors.append("#FF5733") # Distinct color for investors | |
else: | |
node_colors.append("#33FF57") # Distinct color for companies | |
# Create the graph plot | |
plt.figure(figsize=(18, 18)) | |
pos = nx.spring_layout(G, k=0.2, seed=42) # Fixed seed for consistent layout | |
nx.draw( | |
G, pos, | |
with_labels=True, | |
node_size=node_sizes, | |
node_color=node_colors, | |
alpha=0.8, # Slight transparency for Tufte-inspired visuals | |
font_size=10, | |
font_weight="bold", | |
edge_color="#B0BEC5", # Neutral, muted edge color | |
width=0.8 # Thin edges for minimal visual clutter | |
) | |
# Add a legend for node size (valuation) | |
min_size, max_size = 50, 5000 # Example scale | |
for size, label in zip([min_size, max_size], ["$1B", "$100B"]): | |
plt.scatter([], [], s=size, color="#33FF57", label=f"{label} valuation") | |
plt.legend(scatterpoints=1, frameon=False, labelspacing=1.5, loc="lower left", fontsize=12) | |
plt.title("Venture Funded Companies Visualization", fontsize=20) | |
plt.axis('off') | |
# Save plot to BytesIO object | |
buf = BytesIO() | |
plt.savefig(buf, format="png", bbox_inches="tight") | |
plt.close() | |
buf.seek(0) | |
# Convert BytesIO to PIL image | |
image = Image.open(buf) | |
return image | |
def app(selected_country, selected_industry): | |
investor_list, filtered_data = filter_investors_by_country_and_industry(selected_country, selected_industry) | |
return gr.update( | |
choices=investor_list, | |
value=investor_list, | |
visible=True | |
), filtered_data | |
# Gradio Interface | |
def main(): | |
country_list = ["All"] + sorted(data["Country"].dropna().unique()) | |
industry_list = ["All"] + sorted(data["Industry"].dropna().unique()) | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
country_filter = gr.Dropdown(choices=country_list, label="Filter by Country", value="All") | |
industry_filter = gr.Dropdown(choices=industry_list, label="Filter by Industry", value="All") | |
filtered_investor_list = gr.CheckboxGroup( | |
choices=[], | |
label="Select Investors", | |
visible=False | |
) | |
graph_output = gr.Image(type="pil", label="Venture Network Graph") | |
filtered_data_holder = gr.State() | |
country_filter.change( | |
app, | |
inputs=[country_filter, industry_filter], | |
outputs=[filtered_investor_list, filtered_data_holder] | |
) | |
industry_filter.change( | |
app, | |
inputs=[country_filter, industry_filter], | |
outputs=[filtered_investor_list, filtered_data_holder] | |
) | |
filtered_investor_list.change( | |
generate_graph, | |
inputs=[filtered_investor_list, filtered_data_holder], | |
outputs=graph_output | |
) | |
demo.launch() | |
if __name__ == "__main__": | |
main() | |