ritiksh commited on
Commit
16e678c
·
1 Parent(s): cef062a

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +52 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import transformers, accelerate
2
+
3
+ print(accelerate.__version__)
4
+ print(transformers.__version__)
5
+
6
+ # Image Captioning
7
+
8
+ from transformers import AutoProcessor
9
+ from transformers import AutoModelForCausalLM
10
+ import torch
11
+ import streamlit as st
12
+
13
+ device = "cuda" if torch.cuda.is_available() else "cpu" # Set device to GPU if its available
14
+
15
+ checkpoint = "microsoft/git-base"
16
+ processor = AutoProcessor.from_pretrained(checkpoint) # We would load a tokenizer for language. Here we load a processor to process images
17
+
18
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
19
+
20
+
21
+ # Text Search
22
+ st.title("Flower Type Demo")
23
+ st.subheader("Upload an image and See how Chinese qisper works")
24
+
25
+ upload_file = st.file_uploader('Upload an Image')
26
+
27
+ if upload_file:
28
+ test_sample = Image.open(upload_file)
29
+
30
+ inputs = processor(images=image, return_tensors="pt").to(device)
31
+ pixel_values = inputs.pixel_values.to(device)
32
+
33
+ generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
34
+ generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
35
+
36
+
37
+ for i in range(10):
38
+ st.write('New Caption is :')
39
+ st.write(generated_caption)
40
+ image = pipe(generated_caption).images[0]
41
+
42
+ display(image)
43
+
44
+ print("Model Loading + Inference time = " + str(time.time() - t1) + " seconds")
45
+
46
+ st.write("Showing the Image")
47
+ st.image (image, caption=name, width=None, use_column_width=None, clamp=False, channels='RGB', output_format='auto')
48
+
49
+ inputs = processor(images=image, return_tensors="pt").to(device)
50
+ pixel_values = inputs.pixel_values.to(device)
51
+ generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
52
+ generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ datasets
3
+ evaluate
4
+ jiwer
5
+ accelerate
6
+ diffusers
7
+ transformers
8
+ scipy