Spaces:
Sleeping
Sleeping
VenkateshRoshan
commited on
Commit
·
5a809d1
1
Parent(s):
9c621dd
Files updated
Browse files- .github/workflows/sync_to_hf.yml +21 -0
- config/__pycache__/config.cpython-310.pyc +0 -0
- config/config.py +2 -2
- dockerfile +20 -0
- gradioApp.py +37 -0
- models/__pycache__/model.cpython-310.pyc +0 -0
- models/model.py +1 -3
- requirements.txt +7 -0
- train.py +2 -2
.github/workflows/sync_to_hf.yml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to Hugging Face hub
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
branches: [main]
|
5 |
+
|
6 |
+
# to run this workflow manually from the Actions tab.
|
7 |
+
workflow_dispatch:
|
8 |
+
|
9 |
+
jobs:
|
10 |
+
sync-to-hub:
|
11 |
+
runs-on: ubuntu-latest
|
12 |
+
steps:
|
13 |
+
- uses: actions/checkout@v3
|
14 |
+
with:
|
15 |
+
fetch-depth: 0
|
16 |
+
lfs: true
|
17 |
+
- name: Push to hub
|
18 |
+
env:
|
19 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
20 |
+
run : |
|
21 |
+
git push --force https://abven:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/abven/ImageCaptionGenerator main
|
config/__pycache__/config.cpython-310.pyc
CHANGED
Binary files a/config/__pycache__/config.cpython-310.pyc and b/config/__pycache__/config.cpython-310.pyc differ
|
|
config/config.py
CHANGED
@@ -4,8 +4,8 @@ class Config:
|
|
4 |
MAX_SEQ_LEN = 64
|
5 |
VIT_MODEL = 'google/vit-base-patch16-224-in21k'
|
6 |
GPT2_MODEL = 'gpt2'
|
7 |
-
LEARNING_RATE = 5e-5
|
8 |
-
EPOCHS =
|
9 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
10 |
AWS_S3_BUCKET = 'your-s3-bucket-name'
|
11 |
DATASET_PATH = '../Datasets/Flickr8K/'
|
|
|
4 |
MAX_SEQ_LEN = 64
|
5 |
VIT_MODEL = 'google/vit-base-patch16-224-in21k'
|
6 |
GPT2_MODEL = 'gpt2'
|
7 |
+
LEARNING_RATE = 1e-4 #5e-5
|
8 |
+
EPOCHS = 30
|
9 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
10 |
AWS_S3_BUCKET = 'your-s3-bucket-name'
|
11 |
DATASET_PATH = '../Datasets/Flickr8K/'
|
dockerfile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY . .
|
6 |
+
|
7 |
+
RUN apt-get update && apt-get install -y build-essential
|
8 |
+
|
9 |
+
RUN pip install --upgrade pip
|
10 |
+
|
11 |
+
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
|
12 |
+
|
13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
14 |
+
|
15 |
+
# Dockerfile
|
16 |
+
ENV FLASK_RUN_HOST=0.0.0.0
|
17 |
+
|
18 |
+
EXPOSE 5000
|
19 |
+
|
20 |
+
CMD ["python", "app.py"]
|
gradioApp.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from PIL import Image
|
3 |
+
import io
|
4 |
+
from infer import ImageCaptioningInference
|
5 |
+
from models.model import ImageCaptioningModel
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
# Initialize the model
|
9 |
+
model_dir = 'model'
|
10 |
+
model = ImageCaptioningModel()
|
11 |
+
model.load(model_dir)
|
12 |
+
inference_model = ImageCaptioningInference(model)
|
13 |
+
|
14 |
+
def generate_caption(image):
|
15 |
+
if image is None:
|
16 |
+
return "No image provided."
|
17 |
+
|
18 |
+
try:
|
19 |
+
# Generate caption using the image path
|
20 |
+
generated_caption = inference_model.infer_image(image)
|
21 |
+
return generated_caption
|
22 |
+
except Exception as e:
|
23 |
+
return f"Error: {str(e)}"
|
24 |
+
|
25 |
+
# Create Gradio interface
|
26 |
+
iface = gr.Interface(
|
27 |
+
fn=generate_caption,
|
28 |
+
inputs=gr.Image(type="pil"),
|
29 |
+
outputs="text",
|
30 |
+
title="Image Captioning",
|
31 |
+
description="Upload an image or select one from your folder to generate a caption.",
|
32 |
+
examples=[["test_img.jpg"]] # Add some example images if available
|
33 |
+
)
|
34 |
+
|
35 |
+
# Launch the app
|
36 |
+
if __name__ == "__main__":
|
37 |
+
iface.launch()
|
models/__pycache__/model.cpython-310.pyc
CHANGED
Binary files a/models/__pycache__/model.cpython-310.pyc and b/models/__pycache__/model.cpython-310.pyc differ
|
|
models/model.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import torch
|
2 |
from transformers import ViTModel, ViTFeatureExtractor, GPT2LMHeadModel, GPT2Tokenizer
|
3 |
from config.config import Config
|
4 |
-
from torchsummary import summary
|
5 |
from torchvision import transforms
|
6 |
|
7 |
class ImageCaptioningModel:
|
@@ -48,9 +47,8 @@ class ImageCaptioningModel:
|
|
48 |
def save(self, path):
|
49 |
"""Save model to disk."""
|
50 |
self.gpt2_model.save_pretrained(path)
|
51 |
-
self.tokenizer.save_pretrained(path)
|
52 |
|
53 |
def load(self, path):
|
54 |
"""Load model from disk."""
|
55 |
self.gpt2_model = GPT2LMHeadModel.from_pretrained(path).to(self.device)
|
56 |
-
|
|
|
1 |
import torch
|
2 |
from transformers import ViTModel, ViTFeatureExtractor, GPT2LMHeadModel, GPT2Tokenizer
|
3 |
from config.config import Config
|
|
|
4 |
from torchvision import transforms
|
5 |
|
6 |
class ImageCaptioningModel:
|
|
|
47 |
def save(self, path):
|
48 |
"""Save model to disk."""
|
49 |
self.gpt2_model.save_pretrained(path)
|
|
|
50 |
|
51 |
def load(self, path):
|
52 |
"""Load model from disk."""
|
53 |
self.gpt2_model = GPT2LMHeadModel.from_pretrained(path).to(self.device)
|
54 |
+
# return self.gpt2_model
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
PILLOW
|
3 |
+
transformers
|
4 |
+
mlflow
|
5 |
+
pandas
|
6 |
+
torchvision
|
7 |
+
flask
|
train.py
CHANGED
@@ -12,7 +12,6 @@ import mlflow.pytorch
|
|
12 |
|
13 |
# TODO : Implementing Weights and Biases to for project tracking and evaluation and TODO : DVC also for data versioning
|
14 |
|
15 |
-
|
16 |
def train_model(model,dataLoader, optimizer, loss_fn):
|
17 |
|
18 |
with mlflow.start_run():
|
@@ -24,7 +23,8 @@ def train_model(model,dataLoader, optimizer, loss_fn):
|
|
24 |
})
|
25 |
|
26 |
model.gpt2_model.train()
|
27 |
-
for epoch in range(Config.EPOCHS):
|
|
|
28 |
epoch_loss = 0
|
29 |
for batch_idx, (images, captions) in tqdm(enumerate(dataLoader)):
|
30 |
print(f'\rBatch {batch_idx + 1}/{len(dataLoader)} , Loss : {epoch_loss/(batch_idx+1):.4f}\t', end='')
|
|
|
12 |
|
13 |
# TODO : Implementing Weights and Biases to for project tracking and evaluation and TODO : DVC also for data versioning
|
14 |
|
|
|
15 |
def train_model(model,dataLoader, optimizer, loss_fn):
|
16 |
|
17 |
with mlflow.start_run():
|
|
|
23 |
})
|
24 |
|
25 |
model.gpt2_model.train()
|
26 |
+
for epoch in tqdm(range(Config.EPOCHS)):
|
27 |
+
print(f'Epoch {epoch + 1}/{Config.EPOCHS}')
|
28 |
epoch_loss = 0
|
29 |
for batch_idx, (images, captions) in tqdm(enumerate(dataLoader)):
|
30 |
print(f'\rBatch {batch_idx + 1}/{len(dataLoader)} , Loss : {epoch_loss/(batch_idx+1):.4f}\t', end='')
|