Spaces:

Labbeti
/

conette

Running

App Files Files Community

Labbeti commited on Nov 24, 2023

Commit

c37029b

1 Parent(s): b8c79c9

Mod: Refactor model forward.

Browse files

Files changed (1) hide show

app.py +53 -37

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from tempfile import NamedTemporaryFile
 from typing import Any
@@ -20,12 +22,56 @@ def format_cand(cand: str) -> str:
     return f"{cand[0].title()}{cand[1:]}."
 def main() -> None:
     st.header("Describe audio content with CoNeTTE")
     model = load_conette(model_kwds=dict(device="cpu"))
-    st.warning("Recommanded audio: lasting from **1 to 30s**, sampled at **32 kHz** minimum.")
     audios = st.file_uploader(
         "**Upload audio files here:**",
         type=["wav", "flac", "mp3", "ogg", "avi"],
@@ -78,7 +124,7 @@ def main() -> None:
             )
         del allow_rep_mode
-        kwargs: dict[str, Any] = dict(
             task=task,
             beam_size=beam_size,
             min_pred_size=min_pred_size,
@@ -87,39 +133,7 @@ def main() -> None:
         )
     if audios is not None and len(audios) > 0:
-        audio_to_predict = []
-        cands = [""] * len(audios)
-        tmp_files = []
-        tmp_fpaths = []
-        audio_fnames = []
-        for i, audio in enumerate(audios):
-            audio_fname = audio.name
-            audio_fnames.append(audio_fname)
-            cand_key = f"{audio_fname}-{kwargs}"
-            if cand_key in st.session_state:
-                cand = st.session_state[cand_key]
-                cands[i] = cand
-            else:
-                tmp_file = NamedTemporaryFile()
-                tmp_file.write(audio.getvalue())
-                tmp_files.append(tmp_file)
-                audio_to_predict.append((i, cand_key, tmp_file))
-                tmp_fpath = tmp_file.name
-                tmp_fpaths.append(tmp_fpath)
-        if len(tmp_fpaths) > 0:
-            outputs = model(
-                tmp_fpaths,
-                **kwargs,
-            )
-            for i, (j, cand_key, tmp_file) in enumerate(audio_to_predict):
-                cand = outputs["cands"][i]
-                cands[j] = cand
-                st.session_state[cand_key] = cand
-                tmp_file.close()
         for audio_fname, cand in zip(audio_fnames, cands):
             st.success(f"**Output for {audio_fname}:**\n- {format_cand(cand)}")
@@ -127,10 +141,12 @@ def main() -> None:
     if len(record) > 0:
         outputs = model(
             record_fpath,
-            **kwargs,
         )
         cand = outputs["cands"][0]
-        st.success(f"**Output for {'test'}:**\n- {format_cand(cand)}")
 if __name__ == "__main__":

 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import os.path as osp
 from tempfile import NamedTemporaryFile
 from typing import Any
     return f"{cand[0].title()}{cand[1:]}."
+def get_results(
+    model: CoNeTTEModel,
+    audios: list,
+    generate_kwds: dict[str, Any],
+) -> tuple[list[str], list[str]]:
+    audio_to_predict = []
+    cands = [""] * len(audios)
+    tmp_files = []
+    tmp_fpaths = []
+    audio_fnames = []
+    for i, audio in enumerate(audios):
+        audio_fname = audio.name
+        audio_fnames.append(audio_fname)
+        cand_key = f"{audio_fname}-{generate_kwds}"
+        if cand_key in st.session_state:
+            cand = st.session_state[cand_key]
+            cands[i] = cand
+        else:
+            tmp_file = NamedTemporaryFile()
+            tmp_file.write(audio.getvalue())
+            tmp_files.append(tmp_file)
+            audio_to_predict.append((i, cand_key, tmp_file))
+            tmp_fpath = tmp_file.name
+            tmp_fpaths.append(tmp_fpath)
+    if len(tmp_fpaths) > 0:
+        outputs = model(
+            tmp_fpaths,
+            **generate_kwds,
+        )
+        for i, (j, cand_key, tmp_file) in enumerate(audio_to_predict):
+            cand = outputs["cands"][i]
+            cands[j] = cand
+            st.session_state[cand_key] = cand
+            tmp_file.close()
+    return audio_fnames, cands
 def main() -> None:
     st.header("Describe audio content with CoNeTTE")
     model = load_conette(model_kwds=dict(device="cpu"))
+    st.warning(
+        "Recommanded audio: lasting from **1 to 30s**, sampled at **32 kHz** minimum."
+    )
     audios = st.file_uploader(
         "**Upload audio files here:**",
         type=["wav", "flac", "mp3", "ogg", "avi"],
             )
         del allow_rep_mode
+        generate_kwds: dict[str, Any] = dict(
             task=task,
             beam_size=beam_size,
             min_pred_size=min_pred_size,
         )
     if audios is not None and len(audios) > 0:
+        audio_fnames, cands = get_results(model, audios, generate_kwds)
         for audio_fname, cand in zip(audio_fnames, cands):
             st.success(f"**Output for {audio_fname}:**\n- {format_cand(cand)}")
     if len(record) > 0:
         outputs = model(
             record_fpath,
+            **generate_kwds,
         )
         cand = outputs["cands"][0]
+        st.success(
+            f"**Output for {osp.basename(record_fpath)}:**\n- {format_cand(cand)}"
+        )
 if __name__ == "__main__":