datnguyentien204 commited on
Commit
2117908
·
verified ·
1 Parent(s): 8f0ab51

d16909ba7788f34f26090a924304028c807d03bbcb38c496b65d34ff2e560d85

Browse files
visual_genome.ipynb ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "initial_id",
7
+ "metadata": {
8
+ "collapsed": true
9
+ },
10
+ "outputs": [],
11
+ "source": [
12
+ "import matplotlib.pyplot as plt\n",
13
+ "from matplotlib.patches import Rectangle\n",
14
+ "from src import api as vg\n",
15
+ "from PIL import Image as PIL_Image\n",
16
+ "import requests\n",
17
+ "from StringIO import StringIO"
18
+ ]
19
+ }
20
+ ],
21
+ "metadata": {
22
+ "kernelspec": {
23
+ "display_name": "Python 3",
24
+ "language": "python",
25
+ "name": "python3"
26
+ },
27
+ "language_info": {
28
+ "codemirror_mode": {
29
+ "name": "ipython",
30
+ "version": 2
31
+ },
32
+ "file_extension": ".py",
33
+ "mimetype": "text/x-python",
34
+ "name": "python",
35
+ "nbconvert_exporter": "python",
36
+ "pygments_lexer": "ipython2",
37
+ "version": "2.7.6"
38
+ }
39
+ },
40
+ "nbformat": 4,
41
+ "nbformat_minor": 5
42
+ }
vqg_pred.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import requests
3
+ import torch
4
+ from torchvision import transforms
5
+ from torchvision.transforms.functional import InterpolationMode
6
+ from lzma import FILTER_LZMA1
7
+
8
+ try:
9
+ from _lzma import *
10
+ from _lzma import _encode_filter_properties, _decode_filter_properties
11
+ except ImportError:
12
+ from backports.lzma import *
13
+ from backports.lzma import _encode_filter_properties, _decode_filter_properties
14
+
15
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
16
+
17
+
18
+ def load_demo_image(image_size, device):
19
+ img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
20
+ raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
21
+
22
+ w, h = raw_image.size
23
+ print(raw_image.resize((w // 5, h // 5)))
24
+
25
+ transform = transforms.Compose([
26
+ transforms.Resize((image_size, image_size), interpolation=InterpolationMode.BICUBIC),
27
+ transforms.ToTensor(),
28
+ transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
29
+ ])
30
+ image = transform(raw_image).unsqueeze(0).to(device)
31
+ return image
weights/model_base_vqa_capfilt_large.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a7d546209f1ccfa8b3cd3a0138c53e0d1e95e4a4bc280bef8f67e20fe4925ae
3
+ size 1446244375