arcan3 commited on
Commit
b37ce89
·
1 Parent(s): 7a69981

adding the status and the score

Browse files
Files changed (4) hide show
  1. app.py +42 -15
  2. funcs/convertors.py +111 -0
  3. funcs/processor.py +5 -6
  4. funcs/tools.py +20 -0
app.py CHANGED
@@ -21,6 +21,46 @@ reducer10d.load('models/r10d_6.pth')
21
  cluster_som = ClusterSOM()
22
  cluster_som.load("models/cluster_som6.pkl")
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def map_som2animation(som_value):
25
  mapping = {
26
  2: 0, # walk
@@ -33,19 +73,6 @@ def map_som2animation(som_value):
33
 
34
  return mapping.get(som_value, None)
35
 
36
- # def map_som2animation_v2(som_value):
37
- # mapping = {
38
- # versammelter_trab: center of SOM-1,
39
- # arbeits-trab: south-east od SOM-1,
40
- # mittels-trab: North of SOM-1,
41
- # starker-trab: North-west of SOM1,
42
-
43
- # starker-schritt:
44
-
45
- # }
46
-
47
- # return mapping.get(som_value, None)
48
-
49
  def deviation_scores(tensor_data, scale=50):
50
  if len(tensor_data) < 5:
51
  raise ValueError("The input tensor must have at least 5 elements.")
@@ -141,13 +168,13 @@ def get_som_mp4_v2(csv_file_box, slice_size_slider, sample_rate, window_size_sli
141
  csv_writer.writerow(header)
142
  csv_writer.writerows(processed_data)
143
 
144
- # os.system('curl -X POST -F "csv_file=@animation_table.csv" https://metric-space.ngrok.io/generate --output animation.mp4')
145
 
146
  # prediction = cluster_som.predict(embedding10d)
147
  som_video = cluster.plot_activation(embedding10d)
148
  som_video.write_videofile('som_sequence.mp4')
149
 
150
- # return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', 'animation.mp4'
151
  return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', None
152
 
153
  # ml inference
 
21
  cluster_som = ClusterSOM()
22
  cluster_som.load("models/cluster_som6.pkl")
23
 
24
+ def score(self, data, midpoints=None, threshold_radius=4):
25
+ """
26
+ Compute the score for each sample in the data based on the distance of the BMU node to the closest midpoint of the SOM grid.
27
+
28
+ :param data: The input data.
29
+ :param midpoints: A dictionary with keys as the indices of the SOMs and values as lists of midpoints on the grid for the corresponding SOMs.
30
+ :param threshold_radius: The threshold radius for score calculation.
31
+ """
32
+ scores = []
33
+
34
+ for sample in data:
35
+ # Predict the cluster and BMU SOM coordinate for each sample in the data
36
+ result = self.predict([sample])[0]
37
+
38
+ # Check if it is not a noise
39
+ if result[0] != -1:
40
+ # The activated SOM's index and its corresponding BMU
41
+ activated_som_index, bmu = result[0], result[1]
42
+
43
+ # Get the corresponding SOM for the data point
44
+ som = self.som_models[activated_som_index]
45
+
46
+ # If specific midpoints are provided for SOMs, use them; else compute the midpoint of the SOM grid
47
+ if midpoints is not None and activated_som_index in midpoints:
48
+ specified_midpoints = midpoints[activated_som_index]
49
+ else:
50
+ specified_midpoints = [tuple((dim-1)/2 for dim in som.get_weights().shape[:2])]
51
+
52
+ # Compute the grid distances from the BMU to each midpoint and find the minimum distance
53
+ min_distance = min(np.sqrt((midpoint[0] - bmu[0])*2 + (midpoint[1] - bmu[1])*2) for midpoint in specified_midpoints)
54
+
55
+ # Compute the score as the minimum grid distance minus the threshold radius
56
+ score = min_distance - threshold_radius
57
+
58
+ scores.append(score)
59
+ else:
60
+ scores.append(None) # Noise
61
+
62
+ return scores
63
+
64
  def map_som2animation(som_value):
65
  mapping = {
66
  2: 0, # walk
 
73
 
74
  return mapping.get(som_value, None)
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def deviation_scores(tensor_data, scale=50):
77
  if len(tensor_data) < 5:
78
  raise ValueError("The input tensor must have at least 5 elements.")
 
168
  csv_writer.writerow(header)
169
  csv_writer.writerows(processed_data)
170
 
171
+ os.system('curl -X POST -F "csv_file=@animation_table.csv" https://metric-space.ngrok.io/generate --output animation.mp4')
172
 
173
  # prediction = cluster_som.predict(embedding10d)
174
  som_video = cluster.plot_activation(embedding10d)
175
  som_video.write_videofile('som_sequence.mp4')
176
 
177
+ return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', 'animation.mp4'
178
  return processed_file_box, json_file_box, slices_per_leg, plot_box_leg, plot_box_overlay, slice_slider, plot_slice_leg, get_all_slice, slice_json_box, 'som_sequence.mp4', None
179
 
180
  # ml inference
funcs/convertors.py CHANGED
@@ -3,6 +3,7 @@ from funcs.tools import upsample_signal
3
  from funcs.tools import process_signals
4
  from funcs.tools import numpy_to_native
5
  from funcs.plot_func import plot_slices
 
6
  from funcs.tools import fill_missing_values
7
 
8
  import json
@@ -88,4 +89,114 @@ def slice_csv_to_json(input_file, slice_size=64, min_slice_size=16, sample_rate=
88
  if debug:
89
  plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  return 'output.json', len(slices)
 
3
  from funcs.tools import process_signals
4
  from funcs.tools import numpy_to_native
5
  from funcs.plot_func import plot_slices
6
+ from funcs.tools import upsample_signal_v2
7
  from funcs.tools import fill_missing_values
8
 
9
  import json
 
89
  if debug:
90
  plot_slices(original_data[gz_columns[0]], data[gz_columns[0]], precise_slice_points, precise_slice_points, sample_rate, data.index.values[0])
91
 
92
+ return 'output.json', len(slices)
93
+
94
+
95
+ def slice_csv_to_json_v2(input_file, slice_size=64, min_slice_size=10, sample_rate=20):
96
+ data = pd.read_csv(input_file, delimiter=";", index_col=0)
97
+ original_data = data.copy()
98
+ data = fill_missing_values(data)
99
+ data.fillna(0, inplace=True)
100
+
101
+ gz_columns = [col for col in data.columns if col.startswith("GZ")]
102
+ all_peaks = []
103
+ upsample_factor = sample_rate
104
+ combined_smoothed_signals_upsampled = np.zeros(upsample_signal_v2(data[gz_columns[0]].values, upsample_factor).size, dtype=float)
105
+
106
+ process_signals_failed = False
107
+ for gz_col in gz_columns:
108
+ gz_signal = data[gz_col].values
109
+
110
+ upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
111
+ if upsampled_smoothed_signal is None or peaks is None:
112
+ if debug:
113
+ print(f"Skipping {gz_col} due to empty or too short signal")
114
+ continue
115
+
116
+ all_peaks.append(peaks)
117
+ combined_smoothed_signals_upsampled += upsampled_smoothed_signal
118
+ gz_signal = data[gz_col].values
119
+
120
+ try:
121
+ upsampled_smoothed_signal, peaks = process_signals(gz_signal, upsample_factor)
122
+ all_peaks.append(peaks)
123
+ combined_smoothed_signals_upsampled += upsampled_smoothed_signal
124
+ except Exception as e: # Catch any exception from process_signals
125
+ process_signals_failed = True
126
+ break
127
+
128
+ if process_signals_failed or not any(len(peaks) > 0 for peaks in all_peaks):
129
+ precise_loop_points = list(range(0, len(data), slice_size))
130
+ else:
131
+ precise_loop_points = [np.mean(peaks) for peaks in zip(*all_peaks)]
132
+
133
+ precise_slice_points = []
134
+ for i in range(len(precise_loop_points) - 1, -1,-1):
135
+
136
+ interval_end = int(precise_loop_points[i])
137
+ if i > 0:
138
+ interval_start = int(precise_loop_points[i - 1])
139
+ else:
140
+ interval_start = 0
141
+
142
+ max_value_index = np.argmax(combined_smoothed_signals_upsampled[interval_start:interval_end]) + interval_start
143
+ precise_slice_points.append(max_value_index)
144
+
145
+ precise_slice_points.reverse()
146
+
147
+ slices = []
148
+ start_index = 0
149
+ for i, precise_slice_point in enumerate(precise_slice_points):
150
+ end_index = round(precise_slice_point / upsample_factor)
151
+ if i == 0:
152
+ start_index = end_index
153
+ continue
154
+
155
+ if end_index - start_index >= min_slice_size:
156
+ if i == len(precise_slice_points) - 1 and end_index - start_index < slice_size:
157
+ break
158
+
159
+ slice_data = data.iloc[start_index:end_index].to_dict("list")
160
+
161
+ if i > 1 and not process_signals_failed:
162
+ precise_time_diff = (precise_slice_point - precise_slice_points[i - 1])
163
+ precise_timestamp = slices[-1]["precise_timestamp"] + precise_time_diff
164
+
165
+ timestamp = data.index.values[(np.abs(data.index.values - precise_timestamp)).argmin()]
166
+
167
+ time_diff = timestamp - slices[-1]["timestamp"]
168
+
169
+ for j, gz_col in enumerate(gz_columns):
170
+ slice_data[f"{gz_col}_precise_time_diff"] = all_peaks[j][i] - all_peaks[j][i - 1]
171
+ else:
172
+ precise_timestamp = data.index.values[start_index]
173
+ timestamp = precise_timestamp
174
+ time_diff = None
175
+ precise_time_diff = None
176
+
177
+ for j, gz_col in enumerate(gz_columns):
178
+ slice_data[f"{gz_col}_precise_timestamp"] = all_peaks[j][0] if not process_signals_failed else None
179
+ slice_data[f"{gz_col}_precise_time_diff"] = None
180
+
181
+ slice_data["precise_timestamp"] = precise_timestamp
182
+ slice_data["timestamp"] = timestamp
183
+ slice_data["time_diff"] = time_diff
184
+ slice_data["precise_time_diff"] = precise_time_diff
185
+
186
+ if end_index - start_index < slice_size:
187
+ pad_size = slice_size - (end_index - start_index)
188
+ for key in slice_data:
189
+ if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
190
+ slice_data[key] = data[key].iloc[start_index - pad_size:start_index].tolist() + slice_data[key]
191
+ else:
192
+ for key in slice_data:
193
+ if key not in ["timestamp", "time_diff", "precise_timestamp", "precise_time_diff"] and not key.endswith("_precise_timestamp") and not key.endswith("_precise_time_diff"):
194
+ slice_data[key] = slice_data[key][:slice_size]
195
+ slices.append(slice_data)
196
+
197
+ start_index = end_index
198
+
199
+ with open('output.json', "w") as f:
200
+ json.dump(numpy_to_native(slices), f, indent=2)
201
+
202
  return 'output.json', len(slices)
funcs/processor.py CHANGED
@@ -2,11 +2,10 @@ import numpy as np
2
  import pandas as pd
3
  import gradio as gr
4
 
5
- from funcs.convertors import slice_csv_to_json
6
- from funcs.plot_func import plot_sensor_data_from_json, plot_overlay_data_from_json
7
- from funcs.ml_inference import get_som_mp4
8
 
9
- def process_data(input_file, slice_size=64, min_slice_size=16, sample_rate=20, window_size=40, threshold=1000, span_limit=10000000):
10
  # Read the data from the file, including the CRC column
11
  try:
12
  if input_file.name is None:
@@ -75,8 +74,8 @@ def process_data(input_file, slice_size=64, min_slice_size=16, sample_rate=20, w
75
  # Save the resulting DataFrame to a new file
76
  data.to_csv('output.csv', sep=";", na_rep="NaN", float_format="%.0f")
77
 
78
- file, len_ = slice_csv_to_json('output.csv', slice_size, min_slice_size, sample_rate, window_size=window_size)
79
-
80
  # get the plot automatically
81
  sensor_fig, slice_fig, get_all_slice, slice_json, overlay_fig = plot_sensor_data_from_json(file, "GZ1")
82
  # overlay_fig = plot_overlay_data_from_json(file, ["GZ1", "GZ2", "GZ3", "GZ4"])
 
2
  import pandas as pd
3
  import gradio as gr
4
 
5
+ from funcs.convertors import slice_csv_to_json, slice_csv_to_json_v2
6
+ from funcs.plot_func import plot_sensor_data_from_json
 
7
 
8
+ def process_data(input_file, slice_size=64, min_slice_size=10, sample_rate=20, window_size=40, threshold=1000, span_limit=10000000):
9
  # Read the data from the file, including the CRC column
10
  try:
11
  if input_file.name is None:
 
74
  # Save the resulting DataFrame to a new file
75
  data.to_csv('output.csv', sep=";", na_rep="NaN", float_format="%.0f")
76
 
77
+ # file, len_ = slice_csv_to_json('output.csv', slice_size, min_slice_size, sample_rate, window_size=window_size)
78
+ file, len_ = slice_csv_to_json_v2('output.csv', slice_size, min_slice_size, sample_rate)
79
  # get the plot automatically
80
  sensor_fig, slice_fig, get_all_slice, slice_json, overlay_fig = plot_sensor_data_from_json(file, "GZ1")
81
  # overlay_fig = plot_overlay_data_from_json(file, ["GZ1", "GZ2", "GZ3", "GZ4"])
funcs/tools.py CHANGED
@@ -38,3 +38,23 @@ def upsample_signal(signal, upsample_factor):
38
  x_upsampled = np.linspace(0, signal.size - 1, signal.size * upsample_factor)
39
  return interpolator(x_upsampled)
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  x_upsampled = np.linspace(0, signal.size - 1, signal.size * upsample_factor)
39
  return interpolator(x_upsampled)
40
 
41
+ def upsample_signal_v2(signal, upsample_factor):
42
+ x = np.arange(signal.size)
43
+
44
+ # Calculate the second-order derivative
45
+ second_derivative = np.diff(signal, n=2)
46
+
47
+ # Count the number of non-zero second-order derivatives
48
+ non_zero_second_derivatives = np.count_nonzero(second_derivative)
49
+
50
+ # Choose the interpolation method adaptively
51
+ if non_zero_second_derivatives > signal.size * 0.5:
52
+ kind = 'quadratic'
53
+ elif non_zero_second_derivatives > signal.size * 0.25:
54
+ kind = 'cubic'
55
+ else:
56
+ kind = 'linear'
57
+
58
+ interpolator = interp1d(x, signal, kind=kind)
59
+ x_upsampled = np.linspace(0, signal.size - 1, signal.size * upsample_factor)
60
+ return interpolator(x_upsampled)