vicgalle commited on
Commit
1b5849e
1 Parent(s): 6aba0d7

Add application file

Browse files
Files changed (1) hide show
  1. app.py +29 -2
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import streamlit as st
2
  from PIL import Image
 
3
 
4
  st.title("Image to Text Converter")
5
 
@@ -7,15 +8,41 @@ st.title("Image to Text Converter")
7
  uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
8
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  if uploaded_image is not None:
11
  # Display the uploaded image
12
- image = Image.open(uploaded_image)
13
  st.image(image, caption='Uploaded Image.', use_column_width=True)
 
 
 
 
 
 
 
14
 
 
 
 
15
  # Extract text from the image
16
  st.write("Extracting text from the image...")
17
  # Display the extracted text
18
- st.text_area("Extracted Text", "desc", height=200)
19
 
20
 
21
 
 
1
  import streamlit as st
2
  from PIL import Image
3
+ from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
4
 
5
  st.title("Image to Text Converter")
6
 
 
8
  uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
9
 
10
 
11
+ model = PaliGemmaForConditionalGeneration.from_pretrained("Fer14/paligemma_coffe_describer")
12
+ processor = PaliGemmaProcessor.from_pretrained("Fer14/paligemma_coffe_describer")
13
+
14
+ prompt = (
15
+ f"Generate a caption for the following coffee maker image. The caption has to be of the following structure:\n"
16
+ "\"A <color> <type>, <accessories>, <shape> shaped, with <screen> and <number> <b_color> butons\"\n\n"
17
+ "in which:\n"
18
+ "- color: red, black, blue...\n"
19
+ "- type: coffee machine, coffee maker, espresso coffee machine...\n"
20
+ "- accessories: a list of accessories like the ones described above\n"
21
+ "- shape: cubed, round...\n"
22
+ "- screen: screen, no screen.\n"
23
+ "- number: amount of buttons to add\n"
24
+ "- b_color: color of the buttons"
25
+ )
26
+
27
  if uploaded_image is not None:
28
  # Display the uploaded image
29
+ image = Image.open(uploaded_image).convert("RGB")
30
  st.image(image, caption='Uploaded Image.', use_column_width=True)
31
+
32
+ inputs = processor(
33
+ text=prompt,
34
+ images=image,
35
+ return_tensors="pt",
36
+ padding="longest",
37
+ )
38
 
39
+ output = model.generate(**inputs, max_length=1000)
40
+ out = processor.decode(output[0], skip_special_tokens=True)[len(prompt) :]
41
+
42
  # Extract text from the image
43
  st.write("Extracting text from the image...")
44
  # Display the extracted text
45
+ st.text_area("Coffe machine description", out, height=300)
46
 
47
 
48