Spaces:
Build error
Build error
adding the status and the score
Browse files- app.py +42 -15
- funcs/convertors.py +111 -0
- funcs/processor.py +5 -6
- funcs/tools.py +20 -0
app.py
CHANGED
@@ -21,6 +21,46 @@ reducer10d.load('models/r10d_6.pth')
|
|
21 |
cluster_som = ClusterSOM()
|
22 |
cluster_som.load("models/cluster_som6.pkl")
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def map_som2animation(som_value):
|
25 |
mapping = {
|
26 |
2: 0, # walk
|
@@ -33,19 +73,6 @@ def map_som2animation(som_value):
|
|
33 |
|
34 |
return mapping.get(som_value, None)
|
35 |
|
36 |
-
# def map_som2animation_v2(som_value):
|
37 |
-
# mapping = {
|
38 |
-
# versammelter_trab: center of SOM-1,
|
39 |
-
# arbeits-trab: south-east od SOM-1,
|
40 |
-
# mittels-trab: North of SOM-1,
|
41 |
-
# starker-trab: North-west of SOM1,
|
42 |
-
|
43 |
-
# starker-schritt:
|
44 |
-
|
45 |
-
# }
|
46 |
-
|
47 |
-
# return mapping.get(som_value, None)
|
48 |
-
|
49 |
def deviation_scores(tensor_data, scale=50):
|
50 |
if len(tensor_data) < 5:
|
51 |
raise ValueError("The input tensor must have at least 5 elements.")
|
@@ -141,13 +168,13 @@ def get_som_mp4_v2(csv_file_box, slice_size_slider, sample_rate, window_size_sli
|
|
141 |
csv_writer.writerow(header)
|
142 |
csv_writer.writerows(processed_data)
|
143 |
|
144 |
-
|
145 |
|
146 |
# prediction = cluster_som.predict(embedding10d)
|
147 |
som_video = cluster.plot_activation(embedding10d)
|
148 |
som_video.write_videofile('som_sequence.mp4')
|
149 |
|
150 |
-
|
151 |
return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', None
|
152 |
|
153 |
# ml inference
|
|
|
21 |
cluster_som = ClusterSOM()
|
22 |
cluster_som.load("models/cluster_som6.pkl")
|
23 |
|
24 |
+
def score(self, data, midpoints=None, threshold_radius=4):
|
25 |
+
"""
|
26 |
+
Compute the score for each sample in the data based on the distance of the BMU node to the closest midpoint of the SOM grid.
|
27 |
+
|
28 |
+
:param data: The input data.
|
29 |
+
:param midpoints: A dictionary with keys as the indices of the SOMs and values as lists of midpoints on the grid for the corresponding SOMs.
|
30 |
+
:param threshold_radius: The threshold radius for score calculation.
|
31 |
+
"""
|
32 |
+
scores = []
|
33 |
+
|
34 |
+
for sample in data:
|
35 |
+
# Predict the cluster and BMU SOM coordinate for each sample in the data
|
36 |
+
result = self.predict([sample])[0]
|
37 |
+
|
38 |
+
# Check if it is not a noise
|
39 |
+
if result[0] != -1:
|
40 |
+
# The activated SOM's index and its corresponding BMU
|
41 |
+
activated_som_index, bmu = result[0], result[1]
|
42 |
+
|
43 |
+
# Get the corresponding SOM for the data point
|
44 |
+
som = self.som_models[activated_som_index]
|
45 |
+
|
46 |
+
# If specific midpoints are provided for SOMs, use them; else compute the midpoint of the SOM grid
|
47 |
+
if midpoints is not None and activated_som_index in midpoints:
|
48 |
+
specified_midpoints = midpoints[activated_som_index]
|
49 |
+
else:
|
50 |
+
specified_midpoints = [tuple((dim-1)/2 for dim in som.get_weights().shape[:2])]
|
51 |
+
|
52 |
+
# Compute the grid distances from the BMU to each midpoint and find the minimum distance
|
53 |
+
min_distance = min(np.sqrt((midpoint[0] - bmu[0])*2 + (midpoint[1] - bmu[1])*2) for midpoint in specified_midpoints)
|
54 |
+
|
55 |
+
# Compute the score as the minimum grid distance minus the threshold radius
|
56 |
+
score = min_distance - threshold_radius
|
57 |
+
|
58 |
+
scores.append(score)
|
59 |
+
else:
|
60 |
+
scores.append(None) # Noise
|
61 |
+
|
62 |
+
return scores
|
63 |
+
|
64 |
def map_som2animation(som_value):
|
65 |
mapping = {
|
66 |
2: 0, # walk
|
|
|
73 |
|
74 |
return mapping.get(som_value, None)
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
def deviation_scores(tensor_data, scale=50):
|
77 |
if len(tensor_data) < 5:
|
78 |
raise ValueError("The input tensor must have at least 5 elements.")
|
|
|
168 |
csv_writer.writerow(header)
|
169 |
csv_writer.writerows(processed_data)
|
170 |
|
171 |
+
os.system('curl -X POST -F "csv_file=@animation_table.csv" https://metric-space.ngrok.io/generate --output animation.mp4')
|
172 |
|
173 |
# prediction = cluster_som.predict(embedding10d)
|
174 |
som_video = cluster.plot_activation(embedding10d)
|
175 |
som_video.write_videofile('som_sequence.mp4')
|
176 |
|
177 |
+
return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', 'animation.mp4'
|
178 |
return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', None
|
179 |
|
180 |
# ml inference
|
funcs/convertors.py
CHANGED
@@ -3,6 +3,7 @@ from funcs.tools import upsample_signal
|
|
3 |
from funcs.tools import process_signals
|
4 |
from funcs.tools import numpy_to_native
|
5 |
from funcs.plot_func import plot_slices
|
|
|
6 |
from funcs.tools import fill_missing_values
|
7 |
|
8 |
import json
|
@@ -88,4 +89,114 @@ def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=
|
|
88 |
if debug:
|
89 |
plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
return 'output.json', len(slices)
|
|
|
3 |
from funcs.tools import process_signals
|
4 |
from funcs.tools import numpy_to_native
|
5 |
from funcs.plot_func import plot_slices
|
6 |
+
from funcs.tools import upsample_signal_v2
|
7 |
from funcs.tools import fill_missing_values
|
8 |
|
9 |
import json
|
|
|
89 |
if debug:
|
90 |
plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])
|
91 |
|
92 |
+
return 'output.json', len(slices)
|
93 |
+
|
94 |
+
|
95 |
+
def slice_csv_to_json_v2(input_file, slice_size=64, min_slice_size=10, sample_rate=20):
|
96 |
+
data = pd.read_csv(input_file, delimiter=";", index_col=0)
|
97 |
+
original_data = data.copy()
|
98 |
+
data = fill_missing_values(data)
|
99 |
+
data.fillna(0, inplace=True)
|
100 |
+
|
101 |
+
gz_columns = [col for col in data.columns if col.startswith("GZ")]
|
102 |
+
all_peaks = []
|
103 |
+
upsample_factor = sample_rate
|
104 |
+
combined_smoothed_signals_upsampled = np.zeros(upsample_signal_v2(data[gz_columns[0]].values, upsample_factor).size, dtype=float)
|
105 |
+
|
106 |
+
process_signals_failed = False
|
107 |
+
for gz_col in gz_columns:
|
108 |
+
gz_signal = data[gz_col].values
|
109 |
+
|
110 |
+
upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
|
111 |
+
if upsampled_smoothed_signal is None or peaks is None:
|
112 |
+
if debug:
|
113 |
+
print(f"Skipping {gz_col} due to empty or too short signal")
|
114 |
+
continue
|
115 |
+
|
116 |
+
all_peaks.append(peaks)
|
117 |
+
combined_smoothed_signals_upsampled += upsampled_smoothed_signal
|
118 |
+
gz_signal = data[gz_col].values
|
119 |
+
|
120 |
+
try:
|
121 |
+
upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
|
122 |
+
all_peaks.append(peaks)
|
123 |
+
combined_smoothed_signals_upsampled += upsampled_smoothed_signal
|
124 |
+
except Exception as e: # Catch any exception from process_signals
|
125 |
+
process_signals_failed = True
|
126 |
+
break
|
127 |
+
|
128 |
+
if process_signals_failed or not any(len(peaks) > 0 for peaks in all_peaks):
|
129 |
+
precise_loop_points = list(range(0, len(data), slice_size))
|
130 |
+
else:
|
131 |
+
precise_loop_points = [np.mean(peaks) for peaks in zip(*all_peaks)]
|
132 |
+
|
133 |
+
precise_slice_points = []
|
134 |
+
for i in range(len(precise_loop_points) - 1, -1,-1):
|
135 |
+
|
136 |
+
interval_end = int(precise_loop_points[i])
|
137 |
+
if i > 0:
|
138 |
+
interval_start = int(precise_loop_points[i - 1])
|
139 |
+
else:
|
140 |
+
interval_start = 0
|
141 |
+
|
142 |
+
max_value_index = np.argmax(combined_smoothed_signals_upsampled[interval_start:interval_end]) + interval_start
|
143 |
+
precise_slice_points.append(max_value_index)
|
144 |
+
|
145 |
+
precise_slice_points.reverse()
|
146 |
+
|
147 |
+
slices = []
|
148 |
+
start_index = 0
|
149 |
+
for i, precise_slice_point in enumerate(precise_slice_points):
|
150 |
+
end_index = round(precise_slice_point / upsample_factor)
|
151 |
+
if i == 0:
|
152 |
+
start_index = end_index
|
153 |
+
continue
|
154 |
+
|
155 |
+
if end_index - start_index >= min_slice_size:
|
156 |
+
if i == len(precise_slice_points) - 1 and end_index - start_index < slice_size:
|
157 |
+
break
|
158 |
+
|
159 |
+
slice_data = data.iloc[start_index:end_index].to_dict("list")
|
160 |
+
|
161 |
+
if i > 1 and not process_signals_failed:
|
162 |
+
precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
|
163 |
+
precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff
|
164 |
+
|
165 |
+
timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]
|
166 |
+
|
167 |
+
time_diff = timestamp - slices[-1]["timestamp"]
|
168 |
+
|
169 |
+
for j, gz_col in enumerate(gz_columns):
|
170 |
+
slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
|
171 |
+
else:
|
172 |
+
precise_timestamp = data.index.values[start_index]
|
173 |
+
timestamp = precise_timestamp
|
174 |
+
time_diff = None
|
175 |
+
precise_time_diff = None
|
176 |
+
|
177 |
+
for j, gz_col in enumerate(gz_columns):
|
178 |
+
slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] if not process_signals_failed else None
|
179 |
+
slice_data[f"{gz_col}_precise_time_diff"] = None
|
180 |
+
|
181 |
+
slice_data["precise_timestamp"] = precise_timestamp
|
182 |
+
slice_data["timestamp"] = timestamp
|
183 |
+
slice_data["time_diff"] = time_diff
|
184 |
+
slice_data["precise_time_diff"] = precise_time_diff
|
185 |
+
|
186 |
+
if end_index - start_index < slice_size:
|
187 |
+
pad_size = slice_size - (end_index - start_index)
|
188 |
+
for key in slice_data:
|
189 |
+
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
|
190 |
+
slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
|
191 |
+
else:
|
192 |
+
for key in slice_data:
|
193 |
+
if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
|
194 |
+
slice_data[key] = slice_data[key][:slice_size]
|
195 |
+
slices.append(slice_data)
|
196 |
+
|
197 |
+
start_index = end_index
|
198 |
+
|
199 |
+
with open('output.json', "w") as f:
|
200 |
+
json.dump(numpy_to_native(slices), f, indent=2)
|
201 |
+
|
202 |
return 'output.json', len(slices)
|
funcs/processor.py
CHANGED
@@ -2,11 +2,10 @@ import numpy as np
|
|
2 |
import pandas as pd
|
3 |
import gradio as gr
|
4 |
|
5 |
-
from funcs.convertors import slice_csv_to_json
|
6 |
-
from funcs.plot_func import plot_sensor_data_from_json
|
7 |
-
from funcs.ml_inference import get_som_mp4
|
8 |
|
9 |
-
def process_data(input_file, slice_size=64, min_slice_size=
|
10 |
# Read the data from the file, including the CRC column
|
11 |
try:
|
12 |
if input_file.name is None:
|
@@ -75,8 +74,8 @@ def process_data(input_file, slice_size=64, min_slice_size=16, sample_rate=20, w
|
|
75 |
# Save the resulting DataFrame to a new file
|
76 |
data.to_csv('output.csv', sep=";", na_rep="NaN", float_format="%.0f")
|
77 |
|
78 |
-
file, len_ = slice_csv_to_json('output.csv', slice_size, min_slice_size, sample_rate, window_size=window_size)
|
79 |
-
|
80 |
# get the plot automatically
|
81 |
sensor_fig, slice_fig, get_all_slice, slice_json, overlay_fig = plot_sensor_data_from_json(file, "GZ1")
|
82 |
# overlay_fig = plot_overlay_data_from_json(file, ["GZ1", "GZ2", "GZ3", "GZ4"])
|
|
|
2 |
import pandas as pd
|
3 |
import gradio as gr
|
4 |
|
5 |
+
from funcs.convertors import slice_csv_to_json, slice_csv_to_json_v2
|
6 |
+
from funcs.plot_func import plot_sensor_data_from_json
|
|
|
7 |
|
8 |
+
def process_data(input_file, slice_size=64, min_slice_size=10, sample_rate=20, window_size=40, threshold=1000, span_limit=10000000):
|
9 |
# Read the data from the file, including the CRC column
|
10 |
try:
|
11 |
if input_file.name is None:
|
|
|
74 |
# Save the resulting DataFrame to a new file
|
75 |
data.to_csv('output.csv', sep=";", na_rep="NaN", float_format="%.0f")
|
76 |
|
77 |
+
# file, len_ = slice_csv_to_json('output.csv', slice_size, min_slice_size, sample_rate, window_size=window_size)
|
78 |
+
file, len_ = slice_csv_to_json_v2('output.csv', slice_size, min_slice_size, sample_rate)
|
79 |
# get the plot automatically
|
80 |
sensor_fig, slice_fig, get_all_slice, slice_json, overlay_fig = plot_sensor_data_from_json(file, "GZ1")
|
81 |
# overlay_fig = plot_overlay_data_from_json(file, ["GZ1", "GZ2", "GZ3", "GZ4"])
|
funcs/tools.py
CHANGED
@@ -38,3 +38,23 @@ def upsample_signal(signal, upsample_factor):
|
|
38 |
x_upsampled = np.linspace(0, signal.size - 1, signal.size * upsample_factor)
|
39 |
return interpolator(x_upsampled)
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
x_upsampled = np.linspace(0, signal.size - 1, signal.size * upsample_factor)
|
39 |
return interpolator(x_upsampled)
|
40 |
|
41 |
+
def upsample_signal_v2(signal, upsample_factor):
|
42 |
+
x = np.arange(signal.size)
|
43 |
+
|
44 |
+
# Calculate the second-order derivative
|
45 |
+
second_derivative = np.diff(signal, n=2)
|
46 |
+
|
47 |
+
# Count the number of non-zero second-order derivatives
|
48 |
+
non_zero_second_derivatives = np.count_nonzero(second_derivative)
|
49 |
+
|
50 |
+
# Choose the interpolation method adaptively
|
51 |
+
if non_zero_second_derivatives > signal.size * 0.5:
|
52 |
+
kind = 'quadratic'
|
53 |
+
elif non_zero_second_derivatives > signal.size * 0.25:
|
54 |
+
kind = 'cubic'
|
55 |
+
else:
|
56 |
+
kind = 'linear'
|
57 |
+
|
58 |
+
interpolator = interp1d(x, signal, kind=kind)
|
59 |
+
x_upsampled = np.linspace(0, signal.size - 1, signal.size * upsample_factor)
|
60 |
+
return interpolator(x_upsampled)
|