Spaces:
Runtime error
Runtime error
bigmed@bigmed
commited on
Commit
·
f9fa815
1
Parent(s):
ffb81ab
fixed clip import and deleted old files
Browse files- MED_VQA_Huggyface_Gradio.py +1 -1
- app.py +0 -46
- cats.jpg +0 -0
- flagged/image/tmp6px7agq4.jpg +0 -0
- flagged/log.csv +0 -2
MED_VQA_Huggyface_Gradio.py
CHANGED
@@ -5,7 +5,7 @@ from transformers import ViltProcessor, ViltForQuestionAnswering
|
|
5 |
import torch
|
6 |
import torch.nn as nn
|
7 |
from transformers import CLIPTokenizer
|
8 |
-
|
9 |
from Transformers_for_Caption import Transformer_Caption
|
10 |
import numpy as np
|
11 |
import torchvision.transforms as transforms
|
|
|
5 |
import torch
|
6 |
import torch.nn as nn
|
7 |
from transformers import CLIPTokenizer
|
8 |
+
import clip
|
9 |
from Transformers_for_Caption import Transformer_Caption
|
10 |
import numpy as np
|
11 |
import torchvision.transforms as transforms
|
app.py
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
##### VQA MED Demo
|
2 |
-
|
3 |
-
import gradio as gr
|
4 |
-
from transformers import ViltProcessor, ViltForQuestionAnswering
|
5 |
-
import torch
|
6 |
-
|
7 |
-
torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')
|
8 |
-
|
9 |
-
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
10 |
-
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
11 |
-
|
12 |
-
|
13 |
-
def answer_question(image, text):
|
14 |
-
encoding = processor(image, text, return_tensors="pt")
|
15 |
-
|
16 |
-
# forward pass
|
17 |
-
with torch.no_grad():
|
18 |
-
outputs = model(**encoding)
|
19 |
-
|
20 |
-
logits = outputs.logits
|
21 |
-
idx = logits.argmax(-1).item()
|
22 |
-
predicted_answer = model.config.id2label[idx]
|
23 |
-
|
24 |
-
return predicted_answer
|
25 |
-
|
26 |
-
|
27 |
-
image = gr.Image(type="pil")
|
28 |
-
question = gr.Textbox(label="Question")
|
29 |
-
answer = gr.Textbox(label="Predicted answer")
|
30 |
-
examples = [["cats.jpg", "How many cats are there?"]]
|
31 |
-
|
32 |
-
title = "Interactive Visual Question Answering demo (BigMed@ai: Artificial Intelligence for Large-Scale Medical Image Analysis)"
|
33 |
-
description = "<div style='display: flex;align-items: center;justify-content: space-between;'><p style='width:60vw;'>Gradio Demo for VQA medical model trained on PathVQA dataset, To use it, upload your image and type a question and click 'submit', or click one of the examples to load them.</p><a href='https://github.com/dandelin/ViLT' target='_blank' class='link'><img src='file/GitHub.png' style='justify-self:margin-top:0.5em;center; width:calc(200px + 5vw);'></a></div>"
|
34 |
-
|
35 |
-
### link to paper and github code
|
36 |
-
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2102.03334' target='_blank'>BigMed@ai</a> | <a href='https://github.com/dandelin/ViLT' target='_blank'>Github Repo</a></p>"
|
37 |
-
|
38 |
-
interface = gr.Interface(fn=answer_question,
|
39 |
-
inputs=[image, question],
|
40 |
-
outputs=answer,
|
41 |
-
examples=examples,
|
42 |
-
title=title,
|
43 |
-
description=description,
|
44 |
-
article=article,
|
45 |
-
)
|
46 |
-
interface.launch(debug=True, enable_queue=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cats.jpg
DELETED
Binary file (173 kB)
|
|
flagged/image/tmp6px7agq4.jpg
DELETED
Binary file (173 kB)
|
|
flagged/log.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
image,Question,Predicted answer,flag,username,timestamp
|
2 |
-
D:\2023\BigMed_Demos\VQA_Demo\flagged\image\tmp6px7agq4.jpg,How many cats are there?,2,,,2022-12-26 01:49:33.791750
|
|
|
|
|
|