Spaces:

TroglodyteDerivations
/

Abby_Cadabby_WAV2VEC2_Alignment

Sleeping

App Files Files Community

TroglodyteDerivations commited on May 30, 2024

Commit

c5c0498

verified ·

1 Parent(s): 7768e3f

Updated lines 218-259

Browse files

Files changed (1) hide show

app.py +33 -33

app.py CHANGED Viewed

@@ -173,7 +173,7 @@ def plot_trellis_with_path(trellis, path):
     plt.tight_layout()
     return plt
-st.pyplot(plot_trellis_with_path(trellis, path))
 # Part J: Merge Repeats | Segments
 # Merge the labels
@@ -215,50 +215,50 @@ for seg in segments:
     st.write(seg)
 # Part K: Trellis with Segments Visualization
-def plot_trellis_with_segments(trellis, segments, transcript):
-    # To plot trellis with path, we take advantage of 'nan' value
     trellis_with_path = trellis.clone()
     for i, seg in enumerate(segments):
         if seg.label != "|":
             trellis_with_path[seg.start : seg.end, i] = float("nan")
-    fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(15, 15))
-    ax1.set_title("Path, label and probability for each label")
     ax1.imshow(trellis_with_path.T, origin="lower", aspect="auto")
-    # Adjust the position of the annotations to spread them out
     for i, seg in enumerate(segments):
         if seg.label != "|":
-            ax1.annotate(seg.label, (seg.start, i - 0.3), size="small")
-            ax1.annotate(f"{seg.score:.2f}", (seg.start, i + 0.3), size="small")
-    ax2.set_title("Label probability with and without repetition")
-    xs, hs, ws = [], [], []
     for seg in segments:
         if seg.label != "|":
-            xs.append((seg.end + seg.start) / 2 + 0.4)
-            hs.append(seg.score)
-            ws.append(seg.end - seg.start)
-            ax2.annotate(seg.label, (seg.start + 0.8, -0.07), rotation=0)
-    ax2.bar(xs, hs, width=ws, color="gray", alpha=0.9, edgecolor="black")
-    xs, hs = [], []
-    for p in path:
-        label = transcript[p.token_index]
-        if label != "|":
-            xs.append(p.time_index + 1)
-            hs.append(p.score)
-    ax2.bar(xs, hs, width=0.9, alpha=0.9)
-    ax2.axhline(0, color="black")
-    ax2.grid(True, axis="y")
-    ax2.set_ylim(-0.1, 1.1)
     fig.tight_layout()
     return fig
-plot_trellis_with_segments(trellis, segments, updated_clean_UPPERCASE_transcript)
-st.pyplot(plot_trellis_with_segments(trellis, segments, updated_clean_UPPERCASE_transcript))
 # Part L: Merge words | Segments
 # Merge words

     plt.tight_layout()
     return plt
+st.pyplot(plt)
 # Part J: Merge Repeats | Segments
 # Merge the labels
     st.write(seg)
 # Part K: Trellis with Segments Visualization
+def plot_alignments(trellis, segments, word_segments, waveform=np.random.randn(1024), sample_rate=44100):
     trellis_with_path = trellis.clone()
     for i, seg in enumerate(segments):
         if seg.label != "|":
             trellis_with_path[seg.start : seg.end, i] = float("nan")
+    fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(20, 18))
     ax1.imshow(trellis_with_path.T, origin="lower", aspect="auto")
+    ax1.set_facecolor("lightgray")
+    ax1.set_xticks([])
+    ax1.set_yticks([])
+    for word in word_segments:
+        ax1.axvspan(word.start - 0.5, word.end - 0.5, edgecolor="white", facecolor="none")
     for i, seg in enumerate(segments):
         if seg.label != "|":
+            ax1.annotate(seg.label, (seg.start, i - 0.7), size="small")
+            ax1.annotate(f"{seg.score:.2f}", (seg.start, i + 3), size="small")
+    # The original waveform
+    NFFT = 1024  # Adjust NFFT to be less than the length of the waveform
+    ratio = len(waveform) / sample_rate / trellis.size(0)
+    # Add a small offset to the waveform to avoid log of zero or negative numbers
+    waveform = waveform + 1e-10
+    ax2.specgram(waveform, Fs=sample_rate, NFFT=NFFT)
+    for word in word_segments:
+        x0 = ratio * word.start
+        x1 = ratio * word.end
+        ax2.axvspan(x0, x1, facecolor="none", edgecolor="white", hatch="/")
+        ax2.annotate(f"{word.score:.2f}", (x0, sample_rate * 0.51), annotation_clip=False)
     for seg in segments:
         if seg.label != "|":
+            ax2.annotate(seg.label, (seg.start * ratio, sample_rate * 0.55), annotation_clip=False)
+    ax2.set_xlabel("time [second]")
+    ax2.set_yticks([])
     fig.tight_layout()
     return fig
+st.pyplot(fig)
 # Part L: Merge words | Segments
 # Merge words