Spaces:

metricspace
/

cabasus

Build error

App Files Files Community

arcan3 commited on Jun 11, 2023

Commit

b37ce89

1 Parent(s): 7a69981

adding the status and the score

Browse files

Files changed (4) hide show

app.py +42 -15
funcs/convertors.py +111 -0
funcs/processor.py +5 -6
funcs/tools.py +20 -0

app.py CHANGED Viewed

@@ -21,6 +21,46 @@ reducer10d.load('models/r10d_6.pth')
 cluster_som = ClusterSOM()
 cluster_som.load("models/cluster_som6.pkl")
 def map_som2animation(som_value):
     mapping = {
                 2: 0,  # walk
@@ -33,19 +73,6 @@ def map_som2animation(som_value):
     return mapping.get(som_value, None)
-# def map_som2animation_v2(som_value):
-#     mapping = {
-#                 versammelter_trab: center of SOM-1,
-#                 arbeits-trab: south-east od SOM-1,
-#                 mittels-trab: North of SOM-1,
-#                 starker-trab: North-west of SOM1,
-#                 starker-schritt:
-#             }
-#     return mapping.get(som_value, None)
 def deviation_scores(tensor_data, scale=50):
     if len(tensor_data) < 5:
         raise ValueError("The input tensor must have at least 5 elements.")
@@ -141,13 +168,13 @@ def get_som_mp4_v2(csv_file_box, slice_size_slider, sample_rate, window_size_sli
         csv_writer.writerow(header)
         csv_writer.writerows(processed_data)
-    # os.system('curl -X POST -F "csv_file=@animation_table.csv" https://metric-space.ngrok.io/generate --output animation.mp4')
     # prediction = cluster_som.predict(embedding10d)
     som_video = cluster.plot_activation(embedding10d)
     som_video.write_videofile('som_sequence.mp4')
-    # return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', 'animation.mp4'
     return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', None
 # ml inference

 cluster_som = ClusterSOM()
 cluster_som.load("models/cluster_som6.pkl")
+def score(self, data, midpoints=None, threshold_radius=4):
+    """
+    Compute the score for each sample in the data based on the distance of the BMU node to the closest midpoint of the SOM grid.
+    :param data: The input data.
+    :param midpoints: A dictionary with keys as the indices of the SOMs and values as lists of midpoints on the grid for the corresponding SOMs.
+    :param threshold_radius: The threshold radius for score calculation.
+    """
+    scores = []
+    for sample in data:
+        # Predict the cluster and BMU SOM coordinate for each sample in the data
+        result = self.predict([sample])[0]
+        # Check if it is not a noise
+        if result[0] != -1:
+            # The activated SOM's index and its corresponding BMU
+            activated_som_index, bmu = result[0], result[1]
+            # Get the corresponding SOM for the data point
+            som = self.som_models[activated_som_index]
+            # If specific midpoints are provided for SOMs, use them; else compute the midpoint of the SOM grid
+            if midpoints is not None and activated_som_index in midpoints:
+                specified_midpoints = midpoints[activated_som_index]
+            else:
+                specified_midpoints = [tuple((dim-1)/2 for dim in som.get_weights().shape[:2])]
+            # Compute the grid distances from the BMU to each midpoint and find the minimum distance
+            min_distance = min(np.sqrt((midpoint[0] - bmu[0])*2 + (midpoint[1] - bmu[1])*2) for midpoint in specified_midpoints)
+            # Compute the score as the minimum grid distance minus the threshold radius
+            score = min_distance - threshold_radius
+            scores.append(score)
+        else:
+            scores.append(None)  # Noise
+    return scores
 def map_som2animation(som_value):
     mapping = {
                 2: 0,  # walk
     return mapping.get(som_value, None)
 def deviation_scores(tensor_data, scale=50):
     if len(tensor_data) < 5:
         raise ValueError("The input tensor must have at least 5 elements.")
         csv_writer.writerow(header)
         csv_writer.writerows(processed_data)
+    os.system('curl -X POST -F "csv_file=@animation_table.csv" https://metric-space.ngrok.io/generate --output animation.mp4')
     # prediction = cluster_som.predict(embedding10d)
     som_video = cluster.plot_activation(embedding10d)
     som_video.write_videofile('som_sequence.mp4')
+    return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', 'animation.mp4'
     return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', None
 # ml inference

funcs/convertors.py CHANGED Viewed

@@ -3,6 +3,7 @@ from funcs.tools import upsample_signal
 from funcs.tools import process_signals
 from funcs.tools import numpy_to_native
 from funcs.plot_func import plot_slices
 from funcs.tools import fill_missing_values
 import json
@@ -88,4 +89,114 @@ def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=
     if debug:
         plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])
     return 'output.json', len(slices)

 from funcs.tools import process_signals
 from funcs.tools import numpy_to_native
 from funcs.plot_func import plot_slices
+from funcs.tools import upsample_signal_v2
 from funcs.tools import fill_missing_values
 import json
     if debug:
         plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])
+    return 'output.json', len(slices)
+def slice_csv_to_json_v2(input_file, slice_size=64, min_slice_size=10, sample_rate=20):
+    data = pd.read_csv(input_file, delimiter=";", index_col=0)
+    original_data = data.copy()
+    data = fill_missing_values(data)
+    data.fillna(0, inplace=True)
+    gz_columns = [col for col in data.columns if col.startswith("GZ")]
+    all_peaks = []
+    upsample_factor = sample_rate
+    combined_smoothed_signals_upsampled = np.zeros(upsample_signal_v2(data[gz_columns[0]].values, upsample_factor).size, dtype=float)
+    process_signals_failed = False
+    for gz_col in gz_columns:
+        gz_signal = data[gz_col].values
+        upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
+        if upsampled_smoothed_signal is None or peaks is None:
+            if debug:
+                print(f"Skipping {gz_col} due to empty or too short signal")
+            continue
+        all_peaks.append(peaks)
+        combined_smoothed_signals_upsampled += upsampled_smoothed_signal
+        gz_signal = data[gz_col].values
+        try:
+            upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
+            all_peaks.append(peaks)
+            combined_smoothed_signals_upsampled += upsampled_smoothed_signal
+        except Exception as e:  # Catch any exception from process_signals
+            process_signals_failed = True
+            break
+    if process_signals_failed or not any(len(peaks) > 0 for peaks in all_peaks):
+        precise_loop_points = list(range(0, len(data), slice_size))
+    else:
+        precise_loop_points = [np.mean(peaks) for peaks in zip(*all_peaks)]
+    precise_slice_points = []
+    for i in range(len(precise_loop_points) - 1, -1,-1):
+        interval_end = int(precise_loop_points[i])
+        if i > 0:
+            interval_start = int(precise_loop_points[i - 1])
+        else:
+            interval_start = 0
+        max_value_index = np.argmax(combined_smoothed_signals_upsampled[interval_start:interval_end]) + interval_start
+        precise_slice_points.append(max_value_index)
+    precise_slice_points.reverse()
+    slices = []
+    start_index = 0
+    for i, precise_slice_point in enumerate(precise_slice_points):
+        end_index = round(precise_slice_point / upsample_factor)
+        if i == 0:
+            start_index = end_index
+            continue
+        if end_index - start_index >= min_slice_size:
+            if i == len(precise_slice_points) - 1 and end_index - start_index < slice_size:
+                break
+            slice_data = data.iloc[start_index:end_index].to_dict("list")
+            if i > 1 and not process_signals_failed:
+                precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
+                precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff
+                timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]
+                time_diff = timestamp - slices[-1]["timestamp"]
+                for j, gz_col in enumerate(gz_columns):
+                    slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
+            else:
+                precise_timestamp = data.index.values[start_index]
+                timestamp = precise_timestamp
+                time_diff = None
+                precise_time_diff = None
+                for j, gz_col in enumerate(gz_columns):
+                    slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] if not process_signals_failed else None
+                    slice_data[f"{gz_col}_precise_time_diff"] = None
+            slice_data["precise_timestamp"] = precise_timestamp
+            slice_data["timestamp"] = timestamp
+            slice_data["time_diff"] = time_diff
+            slice_data["precise_time_diff"] = precise_time_diff
+            if end_index - start_index < slice_size:
+                pad_size = slice_size - (end_index - start_index)
+                for key in slice_data:
+                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
+                        slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
+            else:
+                for key in slice_data:
+                    if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
+                        slice_data[key] = slice_data[key][:slice_size]
+            slices.append(slice_data)
+        start_index = end_index
+    with open('output.json', "w") as f:
+        json.dump(numpy_to_native(slices), f, indent=2)
     return 'output.json', len(slices)

funcs/processor.py CHANGED Viewed

@@ -2,11 +2,10 @@ import numpy as np
 import pandas as pd
 import gradio as gr
-from funcs.convertors import slice_csv_to_json
-from funcs.plot_func import plot_sensor_data_from_json, plot_overlay_data_from_json
-from funcs.ml_inference import get_som_mp4
-def process_data(input_file, slice_size=64, min_slice_size=16, sample_rate=20, window_size=40, threshold=1000, span_limit=10000000):
     # Read the data from the file, including the CRC column
     try:
         if input_file.name is None:
@@ -75,8 +74,8 @@ def process_data(input_file, slice_size=64, min_slice_size=16, sample_rate=20, w
     # Save the resulting DataFrame to a new file
     data.to_csv('output.csv', sep=";", na_rep="NaN", float_format="%.0f")
-    file, len_ = slice_csv_to_json('output.csv', slice_size, min_slice_size, sample_rate, window_size=window_size)
     # get the plot automatically
     sensor_fig, slice_fig, get_all_slice, slice_json, overlay_fig = plot_sensor_data_from_json(file, "GZ1")
     # overlay_fig = plot_overlay_data_from_json(file, ["GZ1", "GZ2", "GZ3", "GZ4"])

 import pandas as pd
 import gradio as gr
+from funcs.convertors import slice_csv_to_json, slice_csv_to_json_v2
+from funcs.plot_func import plot_sensor_data_from_json
+def process_data(input_file, slice_size=64, min_slice_size=10, sample_rate=20, window_size=40, threshold=1000, span_limit=10000000):
     # Read the data from the file, including the CRC column
     try:
         if input_file.name is None:
     # Save the resulting DataFrame to a new file
     data.to_csv('output.csv', sep=";", na_rep="NaN", float_format="%.0f")
+    # file, len_ = slice_csv_to_json('output.csv', slice_size, min_slice_size, sample_rate, window_size=window_size)
+    file, len_ = slice_csv_to_json_v2('output.csv', slice_size, min_slice_size, sample_rate)
     # get the plot automatically
     sensor_fig, slice_fig, get_all_slice, slice_json, overlay_fig = plot_sensor_data_from_json(file, "GZ1")
     # overlay_fig = plot_overlay_data_from_json(file, ["GZ1", "GZ2", "GZ3", "GZ4"])

funcs/tools.py CHANGED Viewed

@@ -38,3 +38,23 @@ def upsample_signal(signal, upsample_factor):
     x_upsampled = np.linspace(0, signal.size - 1, signal.size * upsample_factor)
     return interpolator(x_upsampled)

     x_upsampled = np.linspace(0, signal.size - 1, signal.size * upsample_factor)
     return interpolator(x_upsampled)
+def upsample_signal_v2(signal, upsample_factor):
+    x = np.arange(signal.size)
+    # Calculate the second-order derivative
+    second_derivative = np.diff(signal, n=2)
+    # Count the number of non-zero second-order derivatives
+    non_zero_second_derivatives = np.count_nonzero(second_derivative)
+    # Choose the interpolation method adaptively
+    if non_zero_second_derivatives > signal.size * 0.5:
+        kind = 'quadratic'
+    elif non_zero_second_derivatives > signal.size * 0.25:
+        kind = 'cubic'
+    else:
+        kind = 'linear'
+    interpolator = interp1d(x, signal, kind=kind)
+    x_upsampled = np.linspace(0, signal.size - 1, signal.size * upsample_factor)
+    return interpolator(x_upsampled)