dokster commited on
Commit
cbafdbe
Β·
1 Parent(s): 9712afd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -26
app.py CHANGED
@@ -74,42 +74,44 @@ def main():
74
 
75
  question = st.text_input("❔ Enter question prompt: ", "")
76
 
77
-
78
- tfile = tempfile.NamedTemporaryFile(delete=False)
79
- tfile.write(uploaded_file.read())
80
 
81
- device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
82
- val_embeddings = []
83
- val_captions = []
84
- result = ''
85
- text = f'Question: {question}? Answer:'
86
 
87
- #read video -> get_ans
88
- video = read_video(tfile.name, transform=None, frames_num=4)
89
 
90
- if len(video) > 0:
91
- i = image_grid(video, 2, 2)
92
- image = preprocess(i).unsqueeze(0).to(device)
93
 
94
- with torch.no_grad():
95
- prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
96
 
97
- val_embeddings.append(prefix)
98
- val_captions.append(text)
99
 
100
- answers = []
101
 
102
- for i in tqdm(range(len(val_embeddings))):
103
- emb = val_embeddings[i]
104
- caption = val_captions[i]
105
 
106
- ans = get_ans(model, tokenizer, emb, prefix_length, caption)
107
- answers.append(ans['answer'])
108
 
109
- result = answers[0].split(' A: ')[0]
110
-
111
- res = st.text_input('βœ… Answer to the question', result, disabled=False)
112
 
 
 
113
 
114
  if __name__ == '__main__':
115
  main()
 
74
 
75
  question = st.text_input("❔ Enter question prompt: ", "")
76
 
77
+ try:
78
+ tfile = tempfile.NamedTemporaryFile(delete=False)
79
+ tfile.write(uploaded_file.read())
80
 
81
+ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
82
+ val_embeddings = []
83
+ val_captions = []
84
+ result = ''
85
+ text = f'Question: {question}? Answer:'
86
 
87
+ #read video -> get_ans
88
+ video = read_video(tfile.name, transform=None, frames_num=4)
89
 
90
+ if len(video) > 0:
91
+ i = image_grid(video, 2, 2)
92
+ image = preprocess(i).unsqueeze(0).to(device)
93
 
94
+ with torch.no_grad():
95
+ prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
96
 
97
+ val_embeddings.append(prefix)
98
+ val_captions.append(text)
99
 
100
+ answers = []
101
 
102
+ for i in tqdm(range(len(val_embeddings))):
103
+ emb = val_embeddings[i]
104
+ caption = val_captions[i]
105
 
106
+ ans = get_ans(model, tokenizer, emb, prefix_length, caption)
107
+ answers.append(ans['answer'])
108
 
109
+ result = answers[0].split(' A: ')[0]
110
+
111
+ res = st.text_input('βœ… Answer to the question', result, disabled=False)
112
 
113
+ except:
114
+ pass
115
 
116
  if __name__ == '__main__':
117
  main()