PROBE

Sleeping

App Files Files Community

mgyigit commited on Nov 27, 2024

Commit

4166fb4

verified ·

1 Parent(s): 2014ab8

Update src/vis_utils.py

Browse files

Files changed (1) hide show

src/vis_utils.py +51 -57

src/vis_utils.py CHANGED Viewed

@@ -14,81 +14,77 @@ sys.path.append('..')
 sys.path.append('.')
 from about import *
-global data_component, filter_component
-def get_baseline_df(selected_methods, selected_metrics):
-    df = pd.read_csv(CSV_RESULT_PATH)
-    present_columns = ["method_name"] + selected_metrics
-    df = df[df['method_name'].isin(selected_methods)][present_columns]
-    return df
-def get_method_color(method):
-    return color_dict.get(method, 'black')  # If method is not in color_dict, use black
-def set_colors_and_marks_for_representation_groups(ax):
-    for label in ax.get_xticklabels():
-        text = label.get_text()
-        color = group_color_dict.get(text, 'black')  # Default to black if label not in dict
-        label.set_color(color)
-        label.set_fontweight('bold')
-        # Add a caret symbol to specific labels
-        if text in {'MUT2VEC', 'PFAM', 'GENE2VEC', 'BERT-PFAM'}:
-            label.set_text(f"^ {text}")
 def benchmark_plot(benchmark_type, methods_selected, x_metric, y_metric):
     if benchmark_type == 'similarity':
-        title = f"{x_metric} vs {y_metric}"
-        return plot_similarity_results(methods_selected, x_metric, y_metric, title)
     elif benchmark_type == 'function':
         return plot_function_results("./data/function_results.csv", x_metric, y_metric, methods_selected)
     elif benchmark_type == 'family':
         return plot_family_results("./data/family_results.csv", methods_selected, x_metric, save_path="./plot_images")
     elif benchmark_type == "affinity":
         return plot_affinity_results("./data/affinity_results.csv", methods_selected, x_metric, save_path="./plot_images")
-    else:
-        # Use general visualizer logic
-        return general_visualizer_plot(methods_selected, x_metric=x_metric, y_metric=y_metric)
-def general_visualizer(methods_selected, x_metric, y_metric):
-    df = pd.read_csv(CSV_RESULT_PATH)
-    filtered_df = df[df['method_name'].isin(methods_selected)]
-    # Create a Seaborn lineplot with method as hue
-    plt.figure(figsize=(10, 8))  # Increase figure size
-    sns.lineplot(
-        data=filtered_df,
-        x=x_metric,
-        y=y_metric,
-        hue="method_name",  # Different colors for different methods
-        marker="o",  # Add markers to the line plot
-    )
-    # Add labels and title
-    plt.xlabel(x_metric)
-    plt.ylabel(y_metric)
-    plt.title(f'{y_metric} vs {x_metric} for selected methods')
-    plt.grid(True)
-    # Save the plot to display it in Gradio
-    plot_path = "plot.png"
-    plt.savefig(plot_path)
-    plt.close()
-    return plot_path
-def plot_similarity_results(methods_selected, x_metric, y_metric, title):
-    df = pd.read_csv(CSV_RESULT_PATH)
-    # Filter the dataframe based on selected methods
-    filtered_df = df[df['method_name'].isin(methods_selected)]
-    def get_method_color(method):
-        return color_dict.get(method.upper(), 'black')
     # Add a new column to the dataframe for the color
     filtered_df['color'] = filtered_df['method_name'].apply(get_method_color)
     adjust_text_dict = {
         'expand_text': (1.15, 1.4), 'expand_points': (1.15, 1.25), 'expand_objects': (1.05, 1.5),
         'expand_align': (1.05, 1.2), 'autoalign': 'xy', 'va': 'center', 'ha': 'center',
@@ -104,7 +100,7 @@ def plot_similarity_results(methods_selected, x_metric, y_metric, title):
                                   label='method_name'))  # Label each point by the method name
          + p9.geom_point(size=3)  # Add points with no jitter, set point size
          + p9.geom_text(nudge_y=0.02, size=8)  # Add method names as labels, nudge slightly above the points
-         + p9.labs(title=title, x=f"{x_metric}", y=f"{y_metric}")  # Dynamic labels for X and Y axes
          + p9.scale_color_identity()  # Use colors directly from the dataframe
          + p9.theme(legend_position='none',
                     figure_size=(8, 8),  # Set figure size
@@ -114,10 +110,8 @@ def plot_similarity_results(methods_selected, x_metric, y_metric, title):
     )
     # Save the plot as an image
-    save_path = "./plot_images"  # Ensure this folder exists or adjust the path
-    os.makedirs(save_path, exist_ok=True)  # Create directory if it doesn't exist
     filename = os.path.join(save_path, title.replace(" ", "_") + "_Similarity_Scatter.png")
     g.save(filename=filename, dpi=400)
     return filename

 sys.path.append('.')
 from about import *
+from saving_utils import download_from_hub
+global data_component, filter_component
 def benchmark_plot(benchmark_type, methods_selected, x_metric, y_metric):
     if benchmark_type == 'similarity':
+        return plot_similarity_results(methods_selected, x_metric, y_metric)
     elif benchmark_type == 'function':
         return plot_function_results("./data/function_results.csv", x_metric, y_metric, methods_selected)
     elif benchmark_type == 'family':
         return plot_family_results("./data/family_results.csv", methods_selected, x_metric, save_path="./plot_images")
     elif benchmark_type == "affinity":
         return plot_affinity_results("./data/affinity_results.csv", methods_selected, x_metric, save_path="./plot_images")
+    return 0
+def get_method_color(method):
+    return color_dict.get(method, 'black')  # If method is not in color_dict, use black
+def get_labels_and_title(x_metric, y_metric):
+    # Define mapping for long forms
+    long_form_mapping = {
+        "MF": "Molecular Function",
+        "BP": "Biological Process",
+        "CC": "Cellular Component"
+    }
+    # Parse the metrics
+    def parse_metric(metric):
+        parts = metric.split("_")
+        dataset = parts[0]  # sparse/200/500
+        category = parts[1]  # MF/BP/CC
+        measure = parts[2]  # pvalue/correlation
+        return dataset, category, measure
+    x_dataset, x_category, x_measure = parse_metric(x_metric)
+    y_dataset, y_category, y_measure = parse_metric(y_metric)
+    # Determine the title
+    if x_category == y_category:
+        title = long_form_mapping[x_category]
+    else:
+        title = f"{long_form_mapping[x_category]} vs {long_form_mapping[y_category]}"
+    # Determine the axis labels
+    x_label = f"{x_measure.capitalize()} on {x_dataset.capitalize()} Dataset"
+    y_label = f"{y_measure.capitalize()} on {y_dataset.capitalize()} Dataset"
+    return title, x_label, y_label
+def plot_similarity_results(methods_selected, x_metric, y_metric, similarity_path="/tmp/similarity_results.csv"):
+    if not os.path.exists(similarity_path):
+        benchmark_types = ["similarity", "function", "family", "affinity"] #download all files for faster results later
+        download_from_hub(benchmark_types)
+    similarity_df = pd.read_csv(similarity_path)
+    # Filter the dataframe based on selected methods
+    filtered_df = similarity_df[similarity_df['method_name'].isin(methods_selected)]
     # Add a new column to the dataframe for the color
     filtered_df['color'] = filtered_df['method_name'].apply(get_method_color)
+    title, x_label, y_label = generate_labels_and_title(x_metric, y_metric)
     adjust_text_dict = {
         'expand_text': (1.15, 1.4), 'expand_points': (1.15, 1.25), 'expand_objects': (1.05, 1.5),
         'expand_align': (1.05, 1.2), 'autoalign': 'xy', 'va': 'center', 'ha': 'center',
                                   label='method_name'))  # Label each point by the method name
          + p9.geom_point(size=3)  # Add points with no jitter, set point size
          + p9.geom_text(nudge_y=0.02, size=8)  # Add method names as labels, nudge slightly above the points
+         + p9.labs(title=title, x=x_label, y=y_label)  # Dynamic labels for X and Y axes
          + p9.scale_color_identity()  # Use colors directly from the dataframe
          + p9.theme(legend_position='none',
                     figure_size=(8, 8),  # Set figure size
     )
     # Save the plot as an image
+    save_path = "/tmp"
     filename = os.path.join(save_path, title.replace(" ", "_") + "_Similarity_Scatter.png")
     g.save(filename=filename, dpi=400)
     return filename