Spaces:

IanRonk
/

sponsoredbye

Runtime error

IanRonk commited on May 21, 2024

Commit

7198a8e

1 Parent(s): 1ac0382

Add functions

Files changed (3) hide show

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from os import pipe
 import gradio as gr
 from functions.punctuation import punctuate
 title = "sponsoredBye - never listen to sponsors again"
 description = "Sponsored sections in videos are annoying and take up a lot of time. Improve your YouTube watching experience, by filling in the youtube url and figure out what segments to skip."
@@ -10,8 +12,10 @@ article = "Check out [the original Rick and Morty Bot](https://huggingface.co/sp
 def pipeline(video_url):
     video_id = video_url.split("?v=")[-1]
     punctuated_text = punctuate(video_id)
     #    return punctuated_text
-    return [{"start": "12:05", "end": "12:52"}]
 # print(pipeline("VL5M5ZihJK4"))

 from os import pipe
 import gradio as gr
 from functions.punctuation import punctuate
+from functions.model_infer import predict_from_document
 title = "sponsoredBye - never listen to sponsors again"
 description = "Sponsored sections in videos are annoying and take up a lot of time. Improve your YouTube watching experience, by filling in the youtube url and figure out what segments to skip."
 def pipeline(video_url):
     video_id = video_url.split("?v=")[-1]
     punctuated_text = punctuate(video_id)
+    sentences = re.split(r"[\.\!\?]\s", punctuated_text)
+    classification = predict_from_document(sentences)
     #    return punctuated_text
+    return [{"start": "12:05", "end": "12:52", "classification": str(classification)}]
 # print(pipeline("VL5M5ZihJK4"))

functions/model_infer.py ADDED Viewed

+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import tensorflow as tf
+import os
+import requests
+from keras.models import load_model
+headers = {"Authorization": f"Bearer {os.environ['HF_Token']}"}
+model = load_model("RNN_model.keras")
+def query_embeddings(texts):
+    payload = {"inputs": texts, "options": {"wait_for_model": True}}
+    model_id = "sentence-transformers/sentence-t5-base"
+    API_URL = (
+        f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
+    )
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+def preprocess(sentences):
+    max_len = 1682
+    embeddings = query_embeddings(sentences)
+    if len(sentences) > max_len:
+        X = embeddings[:max_len]
+    else:
+        X = embeddings
+    X_padded = pad_sequences([X], maxlen=max_len, dtype="float32", padding="post")
+    return X_padded
+def predict_from_document(sentences):
+    preprop = preprocess(sentences)
+    prediction = model.predict(preprop)
+    output = (prediction.flatten()[: len(sentences)] >= 0.5).astype(int)
+    return output

requirements.txt CHANGED Viewed

 youtube_transcript_api
+tensorflow==2.16.1
+keras=3.3.3
+keras-nlp