adil9858 commited on
Commit
76c5b28
·
verified ·
1 Parent(s): c6425bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -71
app.py CHANGED
@@ -1,71 +1,69 @@
1
- import streamlit as st
2
- from transformers import AutoProcessor, AutoModelForCausalLM
3
- from PIL import Image
4
- import torch
5
- import os
6
-
7
- # Load the Florence model and processor
8
- @st.cache_resource
9
- def load_model():
10
- model_id = 'microsoft/Florence-2-large'
11
- model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype='auto').eval().cuda()
12
- processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
13
- return model, processor
14
-
15
- model, processor = load_model()
16
-
17
- # Function to run the model
18
- def run_example(task_prompt, image, text_input=None):
19
- if text_input is None:
20
- prompt = task_prompt
21
- else:
22
- prompt = task_prompt + text_input
23
- inputs = processor(text=prompt, images=image, return_tensors="pt").to('cuda', torch.float16)
24
- generated_ids = model.generate(
25
- input_ids=inputs["input_ids"].cuda(),
26
- pixel_values=inputs["pixel_values"].cuda(),
27
- max_new_tokens=1024,
28
- early_stopping=False,
29
- do_sample=False,
30
- num_beams=3,
31
- )
32
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
33
- parsed_answer = processor.post_process_generation(
34
- generated_text,
35
- task=task_prompt,
36
- image_size=(image.width, image.height)
37
- )
38
- return parsed_answer
39
-
40
- # Streamlit UI
41
- st.title("Microsoft Florence Image Captioning")
42
-
43
- # File uploader
44
- uploaded_file = st.file_uploader("Upload an image (PNG or JPG)", type=["png", "jpg", "jpeg"])
45
-
46
- if uploaded_file is not None:
47
- # Convert and display the image
48
- image = Image.open(uploaded_file).convert("RGB")
49
- st.image(image, caption="Uploaded Image", use_column_width=True)
50
-
51
- # Generate captions
52
- st.subheader("Generated Captions")
53
-
54
- with st.spinner("Generating caption..."):
55
- caption = run_example('<CAPTION>', image)
56
- detailed_caption = run_example('<DETAILED_CAPTION>', image)
57
- more_detailed_caption = run_example('<MORE_DETAILED_CAPTION>', image)
58
-
59
- st.write("**Caption:**", caption)
60
- st.write("**Detailed Caption:**", detailed_caption)
61
- st.write("**More Detailed Caption:**", more_detailed_caption)
62
-
63
- # Option to save the output
64
- if st.button("Save Captions"):
65
- output_path = "captions.txt"
66
- with open(output_path, "w") as file:
67
- file.write(f"Caption: {caption}\n")
68
- file.write(f"Detailed Caption: {detailed_caption}\n")
69
- file.write(f"More Detailed Caption: {more_detailed_caption}\n")
70
- st.success(f"Captions saved to {output_path}!")
71
-
 
1
+ import streamlit as st
2
+ from transformers import AutoProcessor, AutoModelForCausalLM
3
+ from PIL import Image
4
+ import torch
5
+
6
+ # Load the Florence model and processor
7
+ @st.cache_resource
8
+ def load_model():
9
+ model_id = 'microsoft/Florence-2-large'
10
+ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype='auto').eval()
11
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
12
+ return model, processor
13
+
14
+ model, processor = load_model()
15
+
16
+ # Function to run the model
17
+ def run_example(task_prompt, image, text_input=None):
18
+ if text_input is None:
19
+ prompt = task_prompt
20
+ else:
21
+ prompt = task_prompt + text_input
22
+ inputs = processor(text=prompt, images=image, return_tensors="pt").to(torch.float32) # Ensure CPU compatibility
23
+ generated_ids = model.generate(
24
+ input_ids=inputs["input_ids"],
25
+ pixel_values=inputs["pixel_values"],
26
+ max_new_tokens=1024,
27
+ early_stopping=False,
28
+ do_sample=False,
29
+ num_beams=3,
30
+ )
31
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
32
+ parsed_answer = processor.post_process_generation(
33
+ generated_text,
34
+ task=task_prompt,
35
+ image_size=(image.width, image.height)
36
+ )
37
+ return parsed_answer
38
+
39
+ # Streamlit UI
40
+ st.title("Microsoft Florence Image Captioning (CPU)")
41
+
42
+ # File uploader
43
+ uploaded_file = st.file_uploader("Upload an image (PNG or JPG)", type=["png", "jpg", "jpeg"])
44
+
45
+ if uploaded_file is not None:
46
+ # Convert and display the image
47
+ image = Image.open(uploaded_file).convert("RGB")
48
+ st.image(image, caption="Uploaded Image", use_column_width=True)
49
+
50
+ # Generate captions
51
+ st.subheader("Generated Captions")
52
+
53
+ with st.spinner("Generating caption..."):
54
+ caption = run_example('<CAPTION>', image)
55
+ detailed_caption = run_example('<DETAILED_CAPTION>', image)
56
+ more_detailed_caption = run_example('<MORE_DETAILED_CAPTION>', image)
57
+
58
+ st.write("**Caption:**", caption)
59
+ st.write("**Detailed Caption:**", detailed_caption)
60
+ st.write("**More Detailed Caption:**", more_detailed_caption)
61
+
62
+ # Option to save the output
63
+ if st.button("Save Captions"):
64
+ output_path = "captions.txt"
65
+ with open(output_path, "w") as file:
66
+ file.write(f"Caption: {caption}\n")
67
+ file.write(f"Detailed Caption: {detailed_caption}\n")
68
+ file.write(f"More Detailed Caption: {more_detailed_caption}\n")
69
+ st.success(f"Captions saved to {output_path}!")