Spaces:
Sleeping
Sleeping
import transformers, accelerate | |
import requests | |
print(accelerate.__version__) | |
print(transformers.__version__) | |
# Image Captioning | |
from transformers import AutoProcessor | |
from transformers import AutoModelForCausalLM | |
import torch | |
import streamlit as st | |
device = "cuda" if torch.cuda.is_available() else "cpu" # Set device to GPU if its available | |
checkpoint = "microsoft/git-base" | |
processor = AutoProcessor.from_pretrained(checkpoint) # We would load a tokenizer for language. Here we load a processor to process images | |
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device) | |
# Text Search | |
st.title("Flower Type Demo") | |
st.subheader("Upload an image and See how Chinese qisper works") | |
upload_file = st.file_uploader('Upload an Image') | |
from PIL import Image | |
if upload_file: | |
test_sample = Image.open(upload_file) | |
inputs = processor(images=image, return_tensors="pt").to(device) | |
pixel_values = inputs.pixel_values.to(device) | |
generated_ids = model.generate(pixel_values=pixel_values, max_length=50) | |
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
for i in range(10): | |
st.write('New Caption is :') | |
st.write(generated_caption) | |
image = pipe(generated_caption).images[0] | |
display(image) | |
print("Model Loading + Inference time = " + str(time.time() - t1) + " seconds") | |
st.write("Showing the Image") | |
st.image (image, caption=name, width=None, use_column_width=None, clamp=False, channels='RGB', output_format='auto') | |
inputs = processor(images=image, return_tensors="pt").to(device) | |
pixel_values = inputs.pixel_values.to(device) | |
generated_ids = model.generate(pixel_values=pixel_values, max_length=50) | |
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |