Spaces:

LeonceNsh
/

networkx-saas

Sleeping

App Files Files Community

LeonceNsh commited on Nov 29, 2024

Commit

75a7b9a

verified ·

1 Parent(s): 9deed8b

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -223

app.py CHANGED Viewed

@@ -1,207 +1,3 @@
-import pandas as pd
-import networkx as nx
-import plotly.graph_objects as go
-from io import BytesIO
-from PIL import Image
-import gradio as gr
-import logging
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Load and preprocess the dataset
-file_path = "cbinsights_data.csv"  # Replace with your actual file path
-try:
-    data = pd.read_csv(file_path, skiprows=1)
-    logger.info("CSV file loaded successfully.")
-except FileNotFoundError:
-    logger.error(f"File not found: {file_path}")
-    raise
-except Exception as e:
-    logger.error(f"Error loading CSV file: {e}")
-    raise
-# Standardize column names: strip whitespace and convert to lowercase
-data.columns = data.columns.str.strip().str.lower()
-logger.info(f"Standardized Column Names: {data.columns.tolist()}")
-# Identify the valuation column dynamically
-valuation_columns = [col for col in data.columns if 'valuation' in col.lower()]
-if not valuation_columns:
-    logger.error("No column containing 'Valuation' found in the dataset.")
-    raise ValueError("Data Error: Unable to find the valuation column. Please check your CSV file.")
-elif len(valuation_columns) > 1:
-    logger.error("Multiple columns containing 'Valuation' found in the dataset.")
-    raise ValueError("Data Error: Multiple valuation columns detected. Please ensure only one valuation column exists.")
-else:
-    valuation_column = valuation_columns[0]
-    logger.info(f"Using valuation column: {valuation_column}")
-# Clean and prepare data
-data["valuation_billions"] = data[valuation_column].replace({'\$': '', ',': ''}, regex=True)
-data["valuation_billions"] = pd.to_numeric(data["valuation_billions"], errors='coerce')
-logger.info("Valuation data cleaned and converted to numeric.")
-# Strip whitespace from all string columns
-data = data.apply(lambda col: col.str.strip() if col.dtype == "object" else col)
-logger.info("Whitespace stripped from all string columns.")
-# Rename columns for consistency
-expected_columns = {
-    "company": "Company",
-    "valuation_billions": "Valuation_Billions",
-    "date_joined": "Date_Joined",
-    "country": "Country",
-    "city": "City",
-    "industry": "Industry",
-    "select_investors": "Select_Investors"
-}
-missing_columns = set(expected_columns.keys()) - set(data.columns)
-if missing_columns:
-    logger.error(f"Missing columns in the dataset: {missing_columns}")
-    raise ValueError(f"Data Error: Missing columns {missing_columns} in the dataset.")
-data = data.rename(columns=expected_columns)
-logger.info("Columns renamed for consistency.")
-# Parse the "Select_Investors" column to map investors to companies
-def build_investor_company_mapping(df):
-    mapping = {}
-    for _, row in df.iterrows():
-        company = row["Company"]
-        investors = row["Select_Investors"]
-        if pd.notnull(investors):
-            for investor in investors.split(","):
-                investor = investor.strip()
-                if investor:  # Ensure investor is not an empty string
-                    mapping.setdefault(investor, []).append(company)
-    return mapping
-investor_company_mapping = build_investor_company_mapping(data)
-logger.info("Investor to company mapping created.")
-# Function to filter investors based on selected country and industry
-def filter_investors_by_country_and_industry(selected_country, selected_industry):
-    filtered_data = data.copy()
-    logger.info(f"Filtering data for Country: {selected_country}, Industry: {selected_industry}")
-    if selected_country != "All":
-        filtered_data = filtered_data[filtered_data["Country"] == selected_country]
-        logger.info(f"Data filtered by country: {selected_country}. Remaining records: {len(filtered_data)}")
-    if selected_industry != "All":
-        filtered_data = filtered_data[filtered_data["Industry"] == selected_industry]
-        logger.info(f"Data filtered by industry: {selected_industry}. Remaining records: {len(filtered_data)}")
-    investor_company_mapping_filtered = build_investor_company_mapping(filtered_data)
-    # Calculate total valuation per investor
-    investor_valuations = {}
-    for investor, companies in investor_company_mapping_filtered.items():
-        total_valuation = filtered_data[filtered_data["Company"].isin(companies)]["Valuation_Billions"].sum()
-        if total_valuation >= 20:  # Investors with >= 20B total valuation
-            investor_valuations[investor] = total_valuation
-    logger.info(f"Filtered investors with total valuation >= 20B: {len(investor_valuations)}")
-    return list(investor_valuations.keys()), filtered_data
-# Function to generate the Plotly graph
-def generate_graph(selected_investors, filtered_data):
-    if not selected_investors:
-        logger.warning("No investors selected. Returning empty figure.")
-        return go.Figure()
-    investor_company_mapping_filtered = build_investor_company_mapping(filtered_data)
-    filtered_mapping = {inv: investor_company_mapping_filtered[inv] for inv in selected_investors if inv in investor_company_mapping_filtered}
-    logger.info(f"Generating graph for {len(filtered_mapping)} investors.")
-    # Build the graph
-    G = nx.Graph()
-    for investor, companies in filtered_mapping.items():
-        for company in companies:
-            G.add_edge(investor, company)
-    # Generate positions using spring layout
-    pos = nx.spring_layout(G, k=0.2, seed=42)
-    # Prepare Plotly traces
-    edge_x = []
-    edge_y = []
-    for edge in G.edges():
-        x0, y0 = pos[edge[0]]
-        x1, y1 = pos[edge[1]]
-        edge_x += [x0, x1, None]
-        edge_y += [y0, y1, None]
-    edge_trace = go.Scatter(
-        x=edge_x, y=edge_y,
-        line=dict(width=0.5, color='#888'),
-        hoverinfo='none',
-        mode='lines'
-    )
-    node_x = []
-    node_y = []
-    node_text = []
-    node_size = []
-    node_color = []
-    customdata = []
-    for node in G.nodes():
-        x, y = pos[node]
-        node_x.append(x)
-        node_y.append(y)
-        if node in filtered_mapping:
-            node_text.append(f"Investor: {node}")
-            node_size.append(20)  # Investors have larger size
-            node_color.append('orange')
-            customdata.append(None)  # Investors do not have a single valuation
-        else:
-            valuation = filtered_data.loc[filtered_data["Company"] == node, "Valuation_Billions"].sum()
-            node_text.append(f"Company: {node}<br>Valuation: ${valuation}B")
-            node_size.append(10 + (valuation / filtered_data["Valuation_Billions"].max()) * 30 if filtered_data["Valuation_Billions"].max() else 10)
-            node_color.append('green')
-            customdata.append(f"${valuation}B")
-    node_trace = go.Scatter(
-        x=node_x, y=node_y,
-        mode='markers',
-        hoverinfo='text',
-        text=node_text,
-        customdata=customdata,
-        marker=dict(
-            showscale=False,
-            colorscale='YlGnBu',
-            color=node_color,
-            size=node_size,
-            line_width=2
-        )
-    )
-    fig = go.Figure(data=[edge_trace, node_trace],
-             layout=go.Layout(
-                title='Venture Network Visualization',
-                titlefont_size=16,
-                showlegend=False,
-                hovermode='closest',
-                margin=dict(b=20,l=5,r=5,t=40),
-                annotations=[ dict(
-                    text="",
-                    showarrow=False,
-                    xref="paper", yref="paper") ],
-                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
-                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
-                )
-    fig.update_traces(marker=dict(line=dict(width=0.5, color='white')), selector=dict(mode='markers'))
-    logger.info("Plotly graph generated successfully.")
-    return fig
 # Gradio app function to update CheckboxGroup and filtered data
 def app(selected_country, selected_industry):
     investor_list, filtered_data = filter_investors_by_country_and_industry(selected_country, selected_industry)
@@ -219,14 +15,18 @@ def main():
     country_list = ["All"] + sorted(data["Country"].dropna().unique())
     industry_list = ["All"] + sorted(data["Industry"].dropna().unique())
     logger.info(f"Available countries: {country_list}")
     logger.info(f"Available industries: {industry_list}")
     with gr.Blocks() as demo:
         with gr.Row():
-            # Set default value to "US" for country and "Enterprise Tech" for industry
-            country_filter = gr.Dropdown(choices=country_list, label="Filter by Country", value="US")
-            industry_filter = gr.Dropdown(choices=industry_list, label="Filter by Industry", value="Enterprise Tech")
         filtered_investor_list = gr.CheckboxGroup(choices=[], label="Select Investors", visible=False)
         graph_output = gr.Plot(label="Venture Network Graph")
@@ -254,26 +54,17 @@ def main():
         )
         # Handle plot click to display valuation
-        def display_valuation(plotly_click):
-            if plotly_click is None:
                 return "Click on a company node to see its valuation."
-            point = plotly_click
-            if 'text' in point and point['text']:
-                text = point['text']
-                if "Company:" in text:
-                    # Extract valuation
-                    parts = text.split("<br>")
-                    company_part = parts[0]
-                    valuation_part = parts[1]
-                    company = company_part.replace("Company: ", "")
-                    valuation = valuation_part.replace("Valuation: ", "")
-                    return f"**{company}** has a valuation of **{valuation}**."
             return "Click on a company node to see its valuation."
-        graph_output.event(
-            "plotly_click",
             fn=display_valuation,
-            inputs=graph_output,
             outputs=valuation_display
         )

 # Gradio app function to update CheckboxGroup and filtered data
 def app(selected_country, selected_industry):
     investor_list, filtered_data = filter_investors_by_country_and_industry(selected_country, selected_industry)
     country_list = ["All"] + sorted(data["Country"].dropna().unique())
     industry_list = ["All"] + sorted(data["Industry"].dropna().unique())
+    # Ensure the default values for dropdowns exist
+    default_country = "United States" if "United States" in country_list else "All"
+    default_industry = "Enterprise Tech" if "Enterprise Tech" in industry_list else "All"
     logger.info(f"Available countries: {country_list}")
     logger.info(f"Available industries: {industry_list}")
     with gr.Blocks() as demo:
         with gr.Row():
+            # Set default value for country and industry dropdowns
+            country_filter = gr.Dropdown(choices=country_list, label="Filter by Country", value=default_country)
+            industry_filter = gr.Dropdown(choices=industry_list, label="Filter by Industry", value=default_industry)
         filtered_investor_list = gr.CheckboxGroup(choices=[], label="Select Investors", visible=False)
         graph_output = gr.Plot(label="Venture Network Graph")
         )
         # Handle plot click to display valuation
+        def display_valuation(plotly_event):
+            if not plotly_event or "points" not in plotly_event or not plotly_event["points"]:
                 return "Click on a company node to see its valuation."
+            point_data = plotly_event["points"][0]
+            if "customdata" in point_data and point_data["customdata"]:
+                return f"**Valuation:** {point_data['customdata']}"
             return "Click on a company node to see its valuation."
+        graph_output.events().on_click(
             fn=display_valuation,
+            inputs=[graph_output],
             outputs=valuation_display
         )