IanRonk commited on
Commit
7198a8e
1 Parent(s): 1ac0382

Add functions

Browse files
Files changed (3) hide show
  1. app.py +5 -1
  2. functions/model_infer.py +39 -0
  3. requirements.txt +3 -0
app.py CHANGED
@@ -1,6 +1,8 @@
1
  from os import pipe
2
  import gradio as gr
3
  from functions.punctuation import punctuate
 
 
4
 
5
  title = "sponsoredBye - never listen to sponsors again"
6
  description = "Sponsored sections in videos are annoying and take up a lot of time. Improve your YouTube watching experience, by filling in the youtube url and figure out what segments to skip."
@@ -10,8 +12,10 @@ article = "Check out [the original Rick and Morty Bot](https://huggingface.co/sp
10
  def pipeline(video_url):
11
  video_id = video_url.split("?v=")[-1]
12
  punctuated_text = punctuate(video_id)
 
 
13
  # return punctuated_text
14
- return [{"start": "12:05", "end": "12:52"}]
15
 
16
 
17
  # print(pipeline("VL5M5ZihJK4"))
 
1
  from os import pipe
2
  import gradio as gr
3
  from functions.punctuation import punctuate
4
+ from functions.model_infer import predict_from_document
5
+
6
 
7
  title = "sponsoredBye - never listen to sponsors again"
8
  description = "Sponsored sections in videos are annoying and take up a lot of time. Improve your YouTube watching experience, by filling in the youtube url and figure out what segments to skip."
 
12
  def pipeline(video_url):
13
  video_id = video_url.split("?v=")[-1]
14
  punctuated_text = punctuate(video_id)
15
+ sentences = re.split(r"[\.\!\?]\s", punctuated_text)
16
+ classification = predict_from_document(sentences)
17
  # return punctuated_text
18
+ return [{"start": "12:05", "end": "12:52", "classification": str(classification)}]
19
 
20
 
21
  # print(pipeline("VL5M5ZihJK4"))
functions/model_infer.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
2
+ import tensorflow as tf
3
+ import os
4
+ import requests
5
+ from keras.models import load_model
6
+
7
+ headers = {"Authorization": f"Bearer {os.environ['HF_Token']}"}
8
+
9
+ model = load_model("RNN_model.keras")
10
+
11
+
12
+ def query_embeddings(texts):
13
+ payload = {"inputs": texts, "options": {"wait_for_model": True}}
14
+
15
+ model_id = "sentence-transformers/sentence-t5-base"
16
+ API_URL = (
17
+ f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
18
+ )
19
+ response = requests.post(API_URL, headers=headers, json=payload)
20
+ return response.json()
21
+
22
+
23
+ def preprocess(sentences):
24
+ max_len = 1682
25
+ embeddings = query_embeddings(sentences)
26
+
27
+ if len(sentences) > max_len:
28
+ X = embeddings[:max_len]
29
+ else:
30
+ X = embeddings
31
+ X_padded = pad_sequences([X], maxlen=max_len, dtype="float32", padding="post")
32
+ return X_padded
33
+
34
+
35
+ def predict_from_document(sentences):
36
+ preprop = preprocess(sentences)
37
+ prediction = model.predict(preprop)
38
+ output = (prediction.flatten()[: len(sentences)] >= 0.5).astype(int)
39
+ return output
requirements.txt CHANGED
@@ -1 +1,4 @@
1
  youtube_transcript_api
 
 
 
 
1
  youtube_transcript_api
2
+ tensorflow==2.16.1
3
+ keras=3.3.3
4
+ keras-nlp