Spaces:

LeonceNsh
/

networkx-saas

Sleeping

App Files Files Community

LeonceNsh commited on Nov 28, 2024

Commit

b951dc3

verified ·

1 Parent(s): cc9514f

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -26

app.py CHANGED Viewed

@@ -4,31 +4,52 @@ import matplotlib.pyplot as plt
 from io import BytesIO
 from PIL import Image
 import gradio as gr
 # Load and preprocess the dataset
-file_path = "cbinsights_data.csv"  # Replace with your file path
-data = pd.read_csv(file_path, skiprows=1)
 # Standardize column names: strip whitespace and convert to lowercase
 data.columns = data.columns.str.strip().str.lower()
-print("Standardized Column Names:", data.columns.tolist())
 # Identify the valuation column dynamically
 valuation_columns = [col for col in data.columns if 'valuation' in col.lower()]
 if not valuation_columns:
-    raise ValueError("No column containing 'Valuation' found in the dataset.")
 elif len(valuation_columns) > 1:
-    raise ValueError("Multiple columns containing 'Valuation' found. Please specify.")
 else:
     valuation_column = valuation_columns[0]
 # Clean and prepare data
 data["valuation_billions"] = data[valuation_column].replace({'\$': '', ',': ''}, regex=True)
 data["valuation_billions"] = pd.to_numeric(data["valuation_billions"], errors='coerce')
-data = data.applymap(lambda x: x.strip() if isinstance(x, str) else x)
-# Rename columns for consistency (optional)
-data = data.rename(columns={
     "company": "Company",
     "valuation_billions": "Valuation_Billions",
     "date_joined": "Date_Joined",
@@ -36,7 +57,15 @@ data = data.rename(columns={
     "city": "City",
     "industry": "Industry",
     "select_investors": "Select_Investors"
-})
 # Parse the "Select_Investors" column to map investors to companies
 def build_investor_company_mapping(df):
@@ -47,18 +76,24 @@ def build_investor_company_mapping(df):
         if pd.notnull(investors):
             for investor in investors.split(","):
                 investor = investor.strip()
-                mapping.setdefault(investor, []).append(company)
     return mapping
 investor_company_mapping = build_investor_company_mapping(data)
 # Function to filter investors based on selected country and industry
 def filter_investors_by_country_and_industry(selected_country, selected_industry):
     filtered_data = data.copy()
     if selected_country != "All":
         filtered_data = filtered_data[filtered_data["Country"] == selected_country]
     if selected_industry != "All":
         filtered_data = filtered_data[filtered_data["Industry"] == selected_industry]
     investor_company_mapping_filtered = build_investor_company_mapping(filtered_data)
@@ -69,22 +104,27 @@ def filter_investors_by_country_and_industry(selected_country, selected_industry
         if total_valuation >= 20:  # Investors with >= 20B total valuation
             investor_valuations[investor] = total_valuation
     return list(investor_valuations.keys()), filtered_data
 # Function to generate the graph
 def generate_graph(selected_investors, filtered_data):
     if not selected_investors:
         return None
     investor_company_mapping_filtered = build_investor_company_mapping(filtered_data)
-    filtered_mapping = {inv: investor_company_mapping_filtered[inv] for inv in selected_investors}
     # Build the graph
     G = nx.Graph()
     for investor, companies in filtered_mapping.items():
         for company in companies:
             G.add_edge(investor, company)
     # Node size based on valuation
     max_valuation = filtered_data["Valuation_Billions"].max()
     node_sizes = []
@@ -95,10 +135,10 @@ def generate_graph(selected_investors, filtered_data):
             valuation = filtered_data.loc[filtered_data["Company"] == node, "Valuation_Billions"].sum()
             size = (valuation / max_valuation) * 1500 if max_valuation else 100
             node_sizes.append(size)
     # Node color: Investors (orange), Companies (green)
     node_colors = ["#FF8C00" if node in filtered_mapping else "#32CD32" for node in G.nodes]
     # Draw the graph
     plt.figure(figsize=(15, 15))
     pos = nx.spring_layout(G, k=0.2, seed=42)
@@ -111,7 +151,7 @@ def generate_graph(selected_investors, filtered_data):
         edge_color="#A9A9A9",  # Light gray edges
         alpha=0.9
     )
     # Legend
     from matplotlib.lines import Line2D
     legend_elements = [
@@ -119,22 +159,27 @@ def generate_graph(selected_investors, filtered_data):
         Line2D([0], [0], marker='o', color='w', label='Company', markersize=10, markerfacecolor='#32CD32')
     ]
     plt.legend(handles=legend_elements, loc='upper left')
     plt.title("Venture Network Visualization", fontsize=20)
     plt.axis("off")
     # Save plot to BytesIO
     buf = BytesIO()
     plt.savefig(buf, format="png", bbox_inches="tight")
     plt.close()
     buf.seek(0)
     return Image.open(buf)
 # Gradio app function
 def app(selected_country, selected_industry):
     investor_list, filtered_data = filter_investors_by_country_and_industry(selected_country, selected_industry)
-    return gr.CheckboxGroup.update(
         choices=investor_list,
         value=investor_list,
         visible=True
@@ -144,17 +189,20 @@ def app(selected_country, selected_industry):
 def main():
     country_list = ["All"] + sorted(data["Country"].dropna().unique())
     industry_list = ["All"] + sorted(data["Industry"].dropna().unique())
     with gr.Blocks() as demo:
         with gr.Row():
             country_filter = gr.Dropdown(choices=country_list, label="Filter by Country", value="All")
             industry_filter = gr.Dropdown(choices=industry_list, label="Filter by Industry", value="All")
         filtered_investor_list = gr.CheckboxGroup(choices=[], label="Select Investors", visible=False)
         graph_output = gr.Image(type="pil", label="Venture Network Graph")
         filtered_data_holder = gr.State()
         country_filter.change(
             app,
             inputs=[country_filter, industry_filter],
@@ -165,13 +213,13 @@ def main():
             inputs=[country_filter, industry_filter],
             outputs=[filtered_investor_list, filtered_data_holder]
         )
         filtered_investor_list.change(
             generate_graph,
             inputs=[filtered_investor_list, filtered_data_holder],
             outputs=graph_output
         )
     demo.launch()
 if __name__ == "__main__":

 from io import BytesIO
 from PIL import Image
 import gradio as gr
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Load and preprocess the dataset
+file_path = "cbinsights_data.csv"  # Replace with your actual file path
+try:
+    data = pd.read_csv(file_path)
+    logger.info("CSV file loaded successfully.")
+except FileNotFoundError:
+    logger.error(f"File not found: {file_path}")
+    raise
+except Exception as e:
+    logger.error(f"Error loading CSV file: {e}")
+    raise
 # Standardize column names: strip whitespace and convert to lowercase
 data.columns = data.columns.str.strip().str.lower()
+logger.info(f"Standardized Column Names: {data.columns.tolist()}")
 # Identify the valuation column dynamically
 valuation_columns = [col for col in data.columns if 'valuation' in col.lower()]
 if not valuation_columns:
+    logger.error("No column containing 'Valuation' found in the dataset.")
+    raise ValueError("Data Error: Unable to find the valuation column. Please check your CSV file.")
 elif len(valuation_columns) > 1:
+    logger.error("Multiple columns containing 'Valuation' found in the dataset.")
+    raise ValueError("Data Error: Multiple valuation columns detected. Please ensure only one valuation column exists.")
 else:
     valuation_column = valuation_columns[0]
+    logger.info(f"Using valuation column: {valuation_column}")
 # Clean and prepare data
 data["valuation_billions"] = data[valuation_column].replace({'\$': '', ',': ''}, regex=True)
 data["valuation_billions"] = pd.to_numeric(data["valuation_billions"], errors='coerce')
+logger.info("Valuation data cleaned and converted to numeric.")
+# Strip whitespace from all string columns
+data = data.apply(lambda col: col.str.strip() if col.dtype == "object" else col)
+logger.info("Whitespace stripped from all string columns.")
+# Rename columns for consistency
+expected_columns = {
     "company": "Company",
     "valuation_billions": "Valuation_Billions",
     "date_joined": "Date_Joined",
     "city": "City",
     "industry": "Industry",
     "select_investors": "Select_Investors"
+}
+missing_columns = set(expected_columns.keys()) - set(data.columns)
+if missing_columns:
+    logger.error(f"Missing columns in the dataset: {missing_columns}")
+    raise ValueError(f"Data Error: Missing columns {missing_columns} in the dataset.")
+data = data.rename(columns=expected_columns)
+logger.info("Columns renamed for consistency.")
 # Parse the "Select_Investors" column to map investors to companies
 def build_investor_company_mapping(df):
         if pd.notnull(investors):
             for investor in investors.split(","):
                 investor = investor.strip()
+                if investor:  # Ensure investor is not an empty string
+                    mapping.setdefault(investor, []).append(company)
     return mapping
 investor_company_mapping = build_investor_company_mapping(data)
+logger.info("Investor to company mapping created.")
 # Function to filter investors based on selected country and industry
 def filter_investors_by_country_and_industry(selected_country, selected_industry):
     filtered_data = data.copy()
+    logger.info(f"Filtering data for Country: {selected_country}, Industry: {selected_industry}")
     if selected_country != "All":
         filtered_data = filtered_data[filtered_data["Country"] == selected_country]
+        logger.info(f"Data filtered by country: {selected_country}. Remaining records: {len(filtered_data)}")
     if selected_industry != "All":
         filtered_data = filtered_data[filtered_data["Industry"] == selected_industry]
+        logger.info(f"Data filtered by industry: {selected_industry}. Remaining records: {len(filtered_data)}")
     investor_company_mapping_filtered = build_investor_company_mapping(filtered_data)
         if total_valuation >= 20:  # Investors with >= 20B total valuation
             investor_valuations[investor] = total_valuation
+    logger.info(f"Filtered investors with total valuation >= 20B: {len(investor_valuations)}")
     return list(investor_valuations.keys()), filtered_data
 # Function to generate the graph
 def generate_graph(selected_investors, filtered_data):
     if not selected_investors:
+        logger.warning("No investors selected. Returning None for graph.")
         return None
     investor_company_mapping_filtered = build_investor_company_mapping(filtered_data)
+    filtered_mapping = {inv: investor_company_mapping_filtered[inv] for inv in selected_investors if inv in investor_company_mapping_filtered}
+    logger.info(f"Generating graph for {len(filtered_mapping)} investors.")
     # Build the graph
     G = nx.Graph()
     for investor, companies in filtered_mapping.items():
         for company in companies:
             G.add_edge(investor, company)
     # Node size based on valuation
     max_valuation = filtered_data["Valuation_Billions"].max()
     node_sizes = []
             valuation = filtered_data.loc[filtered_data["Company"] == node, "Valuation_Billions"].sum()
             size = (valuation / max_valuation) * 1500 if max_valuation else 100
             node_sizes.append(size)
     # Node color: Investors (orange), Companies (green)
     node_colors = ["#FF8C00" if node in filtered_mapping else "#32CD32" for node in G.nodes]
     # Draw the graph
     plt.figure(figsize=(15, 15))
     pos = nx.spring_layout(G, k=0.2, seed=42)
         edge_color="#A9A9A9",  # Light gray edges
         alpha=0.9
     )
     # Legend
     from matplotlib.lines import Line2D
     legend_elements = [
         Line2D([0], [0], marker='o', color='w', label='Company', markersize=10, markerfacecolor='#32CD32')
     ]
     plt.legend(handles=legend_elements, loc='upper left')
     plt.title("Venture Network Visualization", fontsize=20)
     plt.axis("off")
     # Save plot to BytesIO
     buf = BytesIO()
     plt.savefig(buf, format="png", bbox_inches="tight")
     plt.close()
     buf.seek(0)
+    logger.info("Graph generated successfully.")
     return Image.open(buf)
 # Gradio app function
 def app(selected_country, selected_industry):
     investor_list, filtered_data = filter_investors_by_country_and_industry(selected_country, selected_industry)
+    logger.info("Updating CheckboxGroup and filtered data holder.")
+    # Use gr.update() to create an update dictionary for the CheckboxGroup
+    return gr.update(
         choices=investor_list,
         value=investor_list,
         visible=True
 def main():
     country_list = ["All"] + sorted(data["Country"].dropna().unique())
     industry_list = ["All"] + sorted(data["Industry"].dropna().unique())
+    logger.info(f"Available countries: {country_list}")
+    logger.info(f"Available industries: {industry_list}")
     with gr.Blocks() as demo:
         with gr.Row():
             country_filter = gr.Dropdown(choices=country_list, label="Filter by Country", value="All")
             industry_filter = gr.Dropdown(choices=industry_list, label="Filter by Industry", value="All")
         filtered_investor_list = gr.CheckboxGroup(choices=[], label="Select Investors", visible=False)
         graph_output = gr.Image(type="pil", label="Venture Network Graph")
         filtered_data_holder = gr.State()
         country_filter.change(
             app,
             inputs=[country_filter, industry_filter],
             inputs=[country_filter, industry_filter],
             outputs=[filtered_investor_list, filtered_data_holder]
         )
         filtered_investor_list.change(
             generate_graph,
             inputs=[filtered_investor_list, filtered_data_holder],
             outputs=graph_output
         )
     demo.launch()
 if __name__ == "__main__":