Spaces:

Labbeti
/

conette

Running

Labbeti commited on Nov 17, 2023

Commit

a480cb3

1 Parent(s): 83af184

Mod: Cache candidate results in memory to avoid re-computation.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 from tempfile import NamedTemporaryFile
 import streamlit as st
@@ -14,7 +15,8 @@ def load_conette(*args, **kwargs) -> CoNeTTEModel:
 def main() -> None:
-    st.header("CoNeTTE model test")
     model = load_conette(model_kwds=dict(device="cpu"))
     task = st.selectbox("Task embedding input", model.tasks, 0)
@@ -34,6 +36,7 @@ def main() -> None:
         model.config.max_pred_size,
     )
     audios = st.file_uploader(
         "Upload an audio file",
         type=["wav", "flac", "mp3", "ogg", "avi"],
@@ -45,14 +48,24 @@ def main() -> None:
             with NamedTemporaryFile() as temp:
                 temp.write(audio.getvalue())
                 fpath = temp.name
-                outputs = model(
-                    fpath,
                     task=task,
                     beam_size=beam_size,
                     min_pred_size=min_pred_size,
                     max_pred_size=max_pred_size,
                 )
-                cand = outputs["cands"][0]
                 st.write(f"Output for {audio.name}:")
                 st.write(" - ", cand)

 # -*- coding: utf-8 -*-
 from tempfile import NamedTemporaryFile
+from typing import Any
 import streamlit as st
 def main() -> None:
+    st.header("Describe audio content with CoNeTTE")
     model = load_conette(model_kwds=dict(device="cpu"))
     task = st.selectbox("Task embedding input", model.tasks, 0)
         model.config.max_pred_size,
     )
+    st.write("Recommanded audio: lasting from 1s to 30s, sampled at 32 kHz.")
     audios = st.file_uploader(
         "Upload an audio file",
         type=["wav", "flac", "mp3", "ogg", "avi"],
             with NamedTemporaryFile() as temp:
                 temp.write(audio.getvalue())
                 fpath = temp.name
+                kwargs: dict[str, Any] = dict(
                     task=task,
                     beam_size=beam_size,
                     min_pred_size=min_pred_size,
                     max_pred_size=max_pred_size,
                 )
+                cand_key = f"{audio.name}-{kwargs}"
+                if cand_key in st.session_state:
+                    cand = st.session_state[cand_key]
+                else:
+                    outputs = model(
+                        fpath,
+                        **kwargs,
+                    )
+                    cand = outputs["cands"][0]
+                    st.session_state[cand_key] = cand
                 st.write(f"Output for {audio.name}:")
                 st.write(" - ", cand)