Spaces:

LeonceNsh
/

networkx-saas

Sleeping

File size: 6,407 Bytes

import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from io import BytesIO
from PIL import Image
import gradio as gr

# Load and preprocess the dataset
file_path = "cbinsights_data.csv"  # Replace with your file path
data = pd.read_csv(file_path)

# Rename columns based on the first row and drop the header row
data.columns = data.iloc[0]
data = data[1:]
data.columns = ["Company", "Valuation_Billions", "Date_Joined", "Country", "City", "Industry", "Select_Investors"]

# Clean and prepare data
data["Valuation_Billions"] = data["Valuation_Billions"].str.replace('$', '').str.split('.').str[0]
data["Valuation_Billions"] = pd.to_numeric(data["Valuation_Billions"], errors='coerce')
data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)

# Parse the "Select_Investors" column to map investors to companies
investor_company_mapping = {}
for _, row in data.iterrows():
    company = row["Company"]
    investors = row["Select_Investors"]
    if pd.notnull(investors):
        for investor in investors.split(","):
            investor = investor.strip()
            if investor not in investor_company_mapping:
                investor_company_mapping[investor] = []
            investor_company_mapping[investor].append(company)

# Gradio app functions
def filter_investors_by_country_and_industry(selected_country, selected_industry):
    filtered_data = data

    # Apply filters
    if selected_country != "All":
        filtered_data = filtered_data[filtered_data["Country"] == selected_country]
    if selected_industry != "All":
        filtered_data = filtered_data[filtered_data["Industry"] == selected_industry]

    # Calculate total valuation per investor
    investor_valuations = {}
    for investor, companies in investor_company_mapping.items():
        total_valuation = 0
        for company in companies:
            if company in filtered_data["Company"].values:
                valuation = filtered_data.loc[filtered_data["Company"] == company, "Valuation_Billions"].values
                total_valuation += valuation[0] if len(valuation) > 0 else 0
        if total_valuation >= 20:  # Filter by total valuation
            investor_valuations[investor] = total_valuation

    return list(investor_valuations.keys()), filtered_data

def generate_graph(selected_investors, filtered_data):
    if not selected_investors:
        return None

    # Filter the investor-to-company mapping
    filtered_mapping = {}
    for investor, companies in investor_company_mapping.items():
        if investor in selected_investors:
            filtered_companies = [c for c in companies if c in filtered_data["Company"].values]
            if filtered_companies:
                filtered_mapping[investor] = filtered_companies

    # Use the filtered mapping to build the graph
    G = nx.Graph()
    for investor, companies in filtered_mapping.items():
        for company in companies:
            G.add_edge(investor, company)

    # Node sizes based on valuation
    node_sizes = []
    for node in G.nodes:
        if node in filtered_mapping:  # Fixed size for investors
            node_sizes.append(2000)
        else:  # Company size based on valuation
            valuation = filtered_data.loc[filtered_data["Company"] == node, "Valuation_Billions"].values
            node_sizes.append(valuation[0] * 50 if len(valuation) > 0 else 100)

    # Node colors
    node_colors = []
    for node in G.nodes:
        if node in filtered_mapping:
            node_colors.append("#FF5733")  # Distinct color for investors
        else:
            node_colors.append("#33FF57")  # Distinct color for companies

    # Create the graph plot
    plt.figure(figsize=(18, 18))
    pos = nx.spring_layout(G, k=0.2, seed=42)  # Fixed seed for consistent layout
    nx.draw(
        G, pos,
        with_labels=True,
        node_size=node_sizes,
        node_color=node_colors,
        alpha=0.8,  # Slight transparency for Tufte-inspired visuals
        font_size=10,
        font_weight="bold",
        edge_color="#B0BEC5",  # Neutral, muted edge color
        width=0.8  # Thin edges for minimal visual clutter
    )

    # Add a legend for node size (valuation)
    min_size, max_size = 50, 5000  # Example scale
    for size, label in zip([min_size, max_size], ["$1B", "$100B"]):
        plt.scatter([], [], s=size, color="#33FF57", label=f"{label} valuation")
    plt.legend(scatterpoints=1, frameon=False, labelspacing=1.5, loc="lower left", fontsize=12)

    plt.title("Venture Funded Companies Visualization", fontsize=20)
    plt.axis('off')

    # Save plot to BytesIO object
    buf = BytesIO()
    plt.savefig(buf, format="png", bbox_inches="tight")
    plt.close()
    buf.seek(0)

    # Convert BytesIO to PIL image
    image = Image.open(buf)
    return image

def app(selected_country, selected_industry):
    investor_list, filtered_data = filter_investors_by_country_and_industry(selected_country, selected_industry)

    return gr.update(
        choices=investor_list,
        value=investor_list,
        visible=True
    ), filtered_data

# Gradio Interface
def main():
    country_list = ["All"] + sorted(data["Country"].dropna().unique())
    industry_list = ["All"] + sorted(data["Industry"].dropna().unique())

    with gr.Blocks() as demo:
        with gr.Row():
            country_filter = gr.Dropdown(choices=country_list, label="Filter by Country", value="All")
            industry_filter = gr.Dropdown(choices=industry_list, label="Filter by Industry", value="All")

        filtered_investor_list = gr.CheckboxGroup(
            choices=[],
            label="Select Investors",
            visible=False
        )
        graph_output = gr.Image(type="pil", label="Venture Network Graph")

        filtered_data_holder = gr.State()

        country_filter.change(
            app,
            inputs=[country_filter, industry_filter],
            outputs=[filtered_investor_list, filtered_data_holder]
        )
        industry_filter.change(
            app,
            inputs=[country_filter, industry_filter],
            outputs=[filtered_investor_list, filtered_data_holder]
        )

        filtered_investor_list.change(
            generate_graph,
            inputs=[filtered_investor_list, filtered_data_holder],
            outputs=graph_output
        )

    demo.launch()

if __name__ == "__main__":
    main()