NourFakih commited on
Commit
44f5cce
·
verified ·
1 Parent(s): 7399f80

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +98 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, ViTImageProcessor, VisionEncoderDecoderModel
4
+ import zipfile
5
+ import os
6
+ import csv
7
+ from PIL import Image
8
+
9
+ device = 'cpu'
10
+
11
+ # Load the pretrained model, feature extractor, and tokenizer
12
+ model = VisionEncoderDecoderModel.from_pretrained("NourFakih/Vit-GPT2-COCO2017Flickr-01").to(device)
13
+ feature_extractor = ViTImageProcessor.from_pretrained("NourFakih/Vit-GPT2-COCO2017Flickr-01")
14
+ tokenizer = AutoTokenizer.from_pretrained("NourFakih/Vit-GPT2-COCO2017Flickr-01")
15
+
16
+ def predict(image, max_length=64, num_beams=4):
17
+ # Process the input image
18
+ image = image.convert('RGB')
19
+ pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)
20
+
21
+ # Generate the caption
22
+ caption_ids = model.generate(pixel_values, max_length=max_length, num_beams=num_beams)[0]
23
+
24
+ # Decode and clean the generated caption
25
+ caption = tokenizer.decode(caption_ids, skip_special_tokens=True)
26
+ return caption
27
+
28
+ def process_zip_file(zip_file_path):
29
+ # Create a directory to extract images
30
+ extract_dir = 'extracted_images'
31
+ os.makedirs(extract_dir, exist_ok=True)
32
+
33
+ with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
34
+ zip_ref.extractall(extract_dir)
35
+
36
+ # Verify extracted files and process images
37
+ captions = []
38
+ for root, dirs, files in os.walk(extract_dir):
39
+ for file in files:
40
+ file_path = os.path.join(root, file)
41
+ try:
42
+ # Open and verify the image
43
+ with Image.open(file_path) as img:
44
+ caption = predict(img)
45
+ captions.append((file, caption))
46
+ except Exception as e:
47
+ print(f"Skipping file {file}: {e}")
48
+
49
+ # Save the results to a CSV file
50
+ csv_file_path = 'image_captions.csv'
51
+ with open(csv_file_path, mode='w', newline='') as file:
52
+ writer = csv.writer(file)
53
+ writer.writerow(['Image Name', 'Caption'])
54
+ writer.writerows(captions)
55
+
56
+ return csv_file_path
57
+
58
+ def gr_process_zip(zip_file):
59
+ zip_file_path = zip_file.name
60
+ return process_zip_file(zip_file_path)
61
+
62
+ css = '''
63
+ h1#title {
64
+ text-align: center;
65
+ }
66
+ h3#header {
67
+ text-align: center;
68
+ }
69
+ img#overview {
70
+ max-width: 800px;
71
+ max-height: 600px;
72
+ }
73
+ img#style-image {
74
+ max-width: 1000px;
75
+ max-height: 600px;
76
+ }
77
+ .gr-image {
78
+ max-width: 150px; /* Set a small box for the image */
79
+ max-height: 150px;
80
+ }
81
+ '''
82
+
83
+ demo = gr.Blocks(css=css)
84
+
85
+ with demo:
86
+ gr.Markdown('''<h1 id="title">Image Caption 🖼️</h1>''')
87
+ gr.Markdown('''Made by : No. Fa.''')
88
+
89
+ with gr.Row():
90
+ with gr.Column(scale=1):
91
+ input_zip = gr.File(label="Upload your Zip File", type="file")
92
+ with gr.Column(scale=3):
93
+ output_file = gr.File(label="Download Caption File")
94
+
95
+ btn = gr.Button("Generate Captions")
96
+ btn.click(fn=gr_process_zip, inputs=input_zip, outputs=output_file)
97
+
98
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio
4
+ pillow