VenkateshRoshan commited on
Commit
5a809d1
·
1 Parent(s): 9c621dd

Files updated

Browse files
.github/workflows/sync_to_hf.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab.
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run : |
21
+ git push --force https://abven:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/abven/ImageCaptionGenerator main
config/__pycache__/config.cpython-310.pyc CHANGED
Binary files a/config/__pycache__/config.cpython-310.pyc and b/config/__pycache__/config.cpython-310.pyc differ
 
config/config.py CHANGED
@@ -4,8 +4,8 @@ class Config:
4
  MAX_SEQ_LEN = 64
5
  VIT_MODEL = 'google/vit-base-patch16-224-in21k'
6
  GPT2_MODEL = 'gpt2'
7
- LEARNING_RATE = 5e-5
8
- EPOCHS = 10
9
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
10
  AWS_S3_BUCKET = 'your-s3-bucket-name'
11
  DATASET_PATH = '../Datasets/Flickr8K/'
 
4
  MAX_SEQ_LEN = 64
5
  VIT_MODEL = 'google/vit-base-patch16-224-in21k'
6
  GPT2_MODEL = 'gpt2'
7
+ LEARNING_RATE = 1e-4 #5e-5
8
+ EPOCHS = 30
9
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
10
  AWS_S3_BUCKET = 'your-s3-bucket-name'
11
  DATASET_PATH = '../Datasets/Flickr8K/'
dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY . .
6
+
7
+ RUN apt-get update && apt-get install -y build-essential
8
+
9
+ RUN pip install --upgrade pip
10
+
11
+ RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
12
+
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Dockerfile
16
+ ENV FLASK_RUN_HOST=0.0.0.0
17
+
18
+ EXPOSE 5000
19
+
20
+ CMD ["python", "app.py"]
gradioApp.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ import io
4
+ from infer import ImageCaptioningInference
5
+ from models.model import ImageCaptioningModel
6
+ import numpy as np
7
+
8
+ # Initialize the model
9
+ model_dir = 'model'
10
+ model = ImageCaptioningModel()
11
+ model.load(model_dir)
12
+ inference_model = ImageCaptioningInference(model)
13
+
14
+ def generate_caption(image):
15
+ if image is None:
16
+ return "No image provided."
17
+
18
+ try:
19
+ # Generate caption using the image path
20
+ generated_caption = inference_model.infer_image(image)
21
+ return generated_caption
22
+ except Exception as e:
23
+ return f"Error: {str(e)}"
24
+
25
+ # Create Gradio interface
26
+ iface = gr.Interface(
27
+ fn=generate_caption,
28
+ inputs=gr.Image(type="pil"),
29
+ outputs="text",
30
+ title="Image Captioning",
31
+ description="Upload an image or select one from your folder to generate a caption.",
32
+ examples=[["test_img.jpg"]] # Add some example images if available
33
+ )
34
+
35
+ # Launch the app
36
+ if __name__ == "__main__":
37
+ iface.launch()
models/__pycache__/model.cpython-310.pyc CHANGED
Binary files a/models/__pycache__/model.cpython-310.pyc and b/models/__pycache__/model.cpython-310.pyc differ
 
models/model.py CHANGED
@@ -1,7 +1,6 @@
1
  import torch
2
  from transformers import ViTModel, ViTFeatureExtractor, GPT2LMHeadModel, GPT2Tokenizer
3
  from config.config import Config
4
- from torchsummary import summary
5
  from torchvision import transforms
6
 
7
  class ImageCaptioningModel:
@@ -48,9 +47,8 @@ class ImageCaptioningModel:
48
  def save(self, path):
49
  """Save model to disk."""
50
  self.gpt2_model.save_pretrained(path)
51
- self.tokenizer.save_pretrained(path)
52
 
53
  def load(self, path):
54
  """Load model from disk."""
55
  self.gpt2_model = GPT2LMHeadModel.from_pretrained(path).to(self.device)
56
- self.tokenizer = GPT2Tokenizer.from_pretrained(path).to(self.device)
 
1
  import torch
2
  from transformers import ViTModel, ViTFeatureExtractor, GPT2LMHeadModel, GPT2Tokenizer
3
  from config.config import Config
 
4
  from torchvision import transforms
5
 
6
  class ImageCaptioningModel:
 
47
  def save(self, path):
48
  """Save model to disk."""
49
  self.gpt2_model.save_pretrained(path)
 
50
 
51
  def load(self, path):
52
  """Load model from disk."""
53
  self.gpt2_model = GPT2LMHeadModel.from_pretrained(path).to(self.device)
54
+ # return self.gpt2_model
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ numpy
2
+ PILLOW
3
+ transformers
4
+ mlflow
5
+ pandas
6
+ torchvision
7
+ flask
train.py CHANGED
@@ -12,7 +12,6 @@ import mlflow.pytorch
12
 
13
  # TODO : Implementing Weights and Biases to for project tracking and evaluation and TODO : DVC also for data versioning
14
 
15
-
16
  def train_model(model,dataLoader, optimizer, loss_fn):
17
 
18
  with mlflow.start_run():
@@ -24,7 +23,8 @@ def train_model(model,dataLoader, optimizer, loss_fn):
24
  })
25
 
26
  model.gpt2_model.train()
27
- for epoch in range(Config.EPOCHS):
 
28
  epoch_loss = 0
29
  for batch_idx, (images, captions) in tqdm(enumerate(dataLoader)):
30
  print(f'\rBatch {batch_idx + 1}/{len(dataLoader)} , Loss : {epoch_loss/(batch_idx+1):.4f}\t', end='')
 
12
 
13
  # TODO : Implementing Weights and Biases to for project tracking and evaluation and TODO : DVC also for data versioning
14
 
 
15
  def train_model(model,dataLoader, optimizer, loss_fn):
16
 
17
  with mlflow.start_run():
 
23
  })
24
 
25
  model.gpt2_model.train()
26
+ for epoch in tqdm(range(Config.EPOCHS)):
27
+ print(f'Epoch {epoch + 1}/{Config.EPOCHS}')
28
  epoch_loss = 0
29
  for batch_idx, (images, captions) in tqdm(enumerate(dataLoader)):
30
  print(f'\rBatch {batch_idx + 1}/{len(dataLoader)} , Loss : {epoch_loss/(batch_idx+1):.4f}\t', end='')