user-friendly-metrics

Sleeping

App Files Files Community

hichem-abdellali commited on Dec 5, 2024

Commit

adaef8a

verified ·

1 Parent(s): 2247036

update the user friendly metrics to logs into w&b (#4)

Browse files

- update the user friendly metrics to logs into w&b (c96c4e5195eb95ccd20cc07d19d03a6e4f66f472)

Files changed (1) hide show

user-friendly-metrics.py +139 -24

user-friendly-metrics.py CHANGED Viewed

@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import evaluate
-import datasets
-import motmetrics as mm
-from motmetrics.metrics import (events_to_df_map,
-                                obj_frequencies,
-                                track_ratios)
-import numpy as np
 from seametrics.user_friendly.utils import calculate_from_payload
 _CITATION = """\
 @InProceedings{huggingface:module,
 title = {A great new module},
@@ -70,17 +69,19 @@ class UserFriendlyMetrics(evaluate.Metric):
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
-            features=datasets.Features({
-                "predictions": datasets.Sequence(
-                                datasets.Sequence(datasets.Value("float"))
-                            ),
-                "references": datasets.Sequence(
-                                datasets.Sequence(datasets.Value("float"))
-                            )
-            }),
             # Additional links to the codebase or references
             codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
-            reference_urls=["http://path.to.reference.url/new_module"]
         )
     def _download_and_prepare(self, dl_manager):
@@ -88,14 +89,128 @@ class UserFriendlyMetrics(evaluate.Metric):
         # TODO: Download external resources if needed
         pass
-    def _compute(self,
-                 payload,
-                 max_iou: float = 0.5,
-                 filters = {},
-                 recognition_thresholds = [0.3, 0.5, 0.8],
-                 debug: bool = False):
         """Returns the scores"""
         # TODO: Compute the different scores of the module
-        return calculate_from_payload(payload, max_iou, filters, recognition_thresholds, debug)
-        #return calculate(predictions, references, max_iou)

 # See the License for the specific language governing permissions and
 # limitations under the License.
+import datetime
+import os
+import datasets
+import evaluate
 from seametrics.user_friendly.utils import calculate_from_payload
+import wandb
 _CITATION = """\
 @InProceedings{huggingface:module,
 title = {A great new module},
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
+            features=datasets.Features(
+                {
+                    "predictions": datasets.Sequence(
+                        datasets.Sequence(datasets.Value("float"))
+                    ),
+                    "references": datasets.Sequence(
+                        datasets.Sequence(datasets.Value("float"))
+                    ),
+                }
+            ),
             # Additional links to the codebase or references
             codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
+            reference_urls=["http://path.to.reference.url/new_module"],
         )
     def _download_and_prepare(self, dl_manager):
         # TODO: Download external resources if needed
         pass
+    def _compute(
+        self,
+        payload,
+        max_iou: float = 0.5,
+        filters={},
+        recognition_thresholds=[0.3, 0.5, 0.8],
+        debug: bool = False,
+    ):
         """Returns the scores"""
         # TODO: Compute the different scores of the module
+        return calculate_from_payload(
+            payload, max_iou, filters, recognition_thresholds, debug
+        )
+        # return calculate(predictions, references, max_iou)
+    def wandb(
+        self,
+        results,
+        wandb_section: str = None,
+        wandb_project="user_friendly_metrics",
+        log_plots: bool = True,
+        debug: bool = False,
+    ):
+        """
+        Logs metrics to Weights and Biases (wandb) for tracking and visualization, including categorized bar charts for global metrics.
+        Args:
+            results (dict): Results dictionary with 'global' and 'per_sequence' keys.
+            wandb_section (str, optional): W&B section for metric grouping. Defaults to None.
+            wandb_project (str, optional): The name of the wandb project. Defaults to 'user_friendly_metrics'.
+            log_plots (bool, optional): Generates categorized bar charts for global metrics. Defaults to True.
+            debug (bool, optional): Logs detailed summaries and histories to the terminal console. Defaults to False.
+        """
+        current_datetime = datetime.datetime.now()
+        formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
+        wandb.login(key=os.getenv("WANDB_API_KEY"))
+        run = wandb.init(
+            project=wandb_project,
+            name=f"evaluation-{formatted_datetime}",
+            reinit=True,
+            settings=wandb.Settings(silent=not debug),
+        )
+        categories = {
+            "confusion_metrics": {"fp", "tp", "fn"},
+            "evaluation_metrics": {"f1", "recall", "precision"},
+            "recognition_metrics": {
+                "recognition_0.3",
+                "recognition_0.5",
+                "recognition_0.8",
+                "recognized_0.3",
+                "recognized_0.5",
+                "recognized_0.8",
+            },
+        }
+        chart_data = {key: [] for key in categories.keys()}
+        # Log global metrics
+        if "global" in results:
+            for global_key, global_metrics in results["global"].items():
+                for metric, value in global_metrics["all"].items():
+                    log_key = (
+                        f"{wandb_section}/global/{global_key}/{metric}"
+                        if wandb_section
+                        else f"global/{global_key}/{metric}"
+                    )
+                    run.log({log_key: value})
+                    if debug:
+                        print(f"Logged to W&B: {log_key} = {value}")
+                    for category, metrics in categories.items():
+                        if metric in metrics:
+                            chart_data[category].append([metric, value])
+        if log_plots:
+            for category, data in chart_data.items():
+                if data:
+                    table_data = [[label, value] for label, value in data]
+                    table = wandb.Table(data=table_data, columns=["metrics", "value"])
+                    run.log(
+                        {
+                            f"{category}_bar_chart": wandb.plot.bar(
+                                table,
+                                "metrics",
+                                "value",
+                                title=f"{category.replace('_', ' ').title()}",
+                            )
+                        }
+                    )
+        if "per_sequence" in results:
+            sorted_sequences = sorted(
+                results["per_sequence"].items(),
+                key=lambda x: x[1]
+                .get("evaluation_metrics", {})
+                .get("f1", {})
+                .get("all", 0),
+                reverse=True,
+            )
+            for sequence_name, sequence_data in sorted_sequences:
+                for seq_key, seq_metrics in sequence_data.items():
+                    for metric, value in seq_metrics["all"].items():
+                        log_key = (
+                            f"{wandb_section}/per_sequence/{sequence_name}/{seq_key}/{metric}"
+                            if wandb_section
+                            else f"per_sequence/{sequence_name}/{seq_key}/{metric}"
+                        )
+                        run.log({log_key: value})
+                        if debug:
+                            print(
+                                f"Logged to W&B: {sequence_name} -> {log_key} = {value}"
+                            )
+        if debug:
+            print("\nDebug Mode: Logging Summary and History")
+            print(f"Results Summary:\n{results}")
+            print(f"WandB Settings:\n{run.settings}")
+            print("All metrics have been logged.")
+        run.finish()