Spaces:

dokster
/

vqa-analysis

Sleeping

dokster commited on Jul 2, 2023

Commit

cbafdbe

1 Parent(s): 9712afd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -74,42 +74,44 @@ def main():
     question = st.text_input("❔ Enter question prompt: ", "")
-    tfile = tempfile.NamedTemporaryFile(delete=False)
-    tfile.write(uploaded_file.read())
-    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
-    val_embeddings = []
-    val_captions = []
-    result = ''
-    text = f'Question: {question}? Answer:'
-    #read video -> get_ans
-    video = read_video(tfile.name, transform=None, frames_num=4)
-    if len(video) > 0:
-        i = image_grid(video, 2, 2)
-        image = preprocess(i).unsqueeze(0).to(device)
-        with torch.no_grad():
-            prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
-        val_embeddings.append(prefix)
-        val_captions.append(text)
-    answers = []
-    for i in tqdm(range(len(val_embeddings))):
-        emb = val_embeddings[i]
-        caption = val_captions[i]
-        ans = get_ans(model, tokenizer, emb, prefix_length, caption)
-        answers.append(ans['answer'])
-    result = answers[0].split(' A: ')[0]
-    res = st.text_input('✅ Answer to the question', result, disabled=False)
 if __name__ == '__main__':
     main()

     question = st.text_input("❔ Enter question prompt: ", "")
+    try:
+        tfile = tempfile.NamedTemporaryFile(delete=False)
+        tfile.write(uploaded_file.read())
+        device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+        val_embeddings = []
+        val_captions = []
+        result = ''
+        text = f'Question: {question}? Answer:'
+        #read video -> get_ans
+        video = read_video(tfile.name, transform=None, frames_num=4)
+        if len(video) > 0:
+            i = image_grid(video, 2, 2)
+            image = preprocess(i).unsqueeze(0).to(device)
+            with torch.no_grad():
+                prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
+            val_embeddings.append(prefix)
+            val_captions.append(text)
+        answers = []
+        for i in tqdm(range(len(val_embeddings))):
+            emb = val_embeddings[i]
+            caption = val_captions[i]
+            ans = get_ans(model, tokenizer, emb, prefix_length, caption)
+            answers.append(ans['answer'])
+        result = answers[0].split(' A: ')[0]
+        res = st.text_input('✅ Answer to the question', result, disabled=False)
+    except:
+        pass
 if __name__ == '__main__':
     main()