Update app.py
Browse files
app.py
CHANGED
@@ -81,7 +81,15 @@ def extract_frames(video_in, interval=24, output_format='.jpg'):
|
|
81 |
|
82 |
return frames
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
def process_image(image_in):
|
|
|
85 |
client = Client("https://vikhyatk-moondream1.hf.space/")
|
86 |
result = client.predict(
|
87 |
image_in, # filepath in 'image' Image component
|
@@ -91,6 +99,12 @@ def process_image(image_in):
|
|
91 |
)
|
92 |
print(result)
|
93 |
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
def extract_audio(video_path):
|
96 |
video_clip = VideoFileClip(video_path)
|
|
|
81 |
|
82 |
return frames
|
83 |
|
84 |
+
from transformers import AutoModelForCausalLM, CodeGenTokenizerFast as Tokenizer
|
85 |
+
from PIL import Image
|
86 |
+
|
87 |
+
cap_model_id = "vikhyatk/moondream1"
|
88 |
+
cap_model = AutoModelForCausalLM.from_pretrained(cap_model_id, trust_remote_code=True)
|
89 |
+
cap_tokenizer = Tokenizer.from_pretrained(cap_model_id)
|
90 |
+
|
91 |
def process_image(image_in):
|
92 |
+
'''
|
93 |
client = Client("https://vikhyatk-moondream1.hf.space/")
|
94 |
result = client.predict(
|
95 |
image_in, # filepath in 'image' Image component
|
|
|
99 |
)
|
100 |
print(result)
|
101 |
return result
|
102 |
+
'''
|
103 |
+
image = Image.open(image_in)
|
104 |
+
enc_image = cap_model.encode_image(image)
|
105 |
+
result = cap_model.answer_question(enc_image, "Describe precisely the image in one sentence.", cap_tokenizer)
|
106 |
+
print(result)
|
107 |
+
return result
|
108 |
|
109 |
def extract_audio(video_path):
|
110 |
video_clip = VideoFileClip(video_path)
|