Spaces:
Runtime error
Runtime error
File size: 4,041 Bytes
c04f965 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import logging
import torch
import os
from PIL import Image
from io import BytesIO
from transformers import AutoProcessor, Idefics2ForConditionalGeneration
from huggingface_hub import hf_hub_download
from indexify_extractor_sdk import Content, Extractor, Feature
from .parse_utils import token2json
from pydantic import BaseModel
from pydantic_settings import BaseSettings
from typing import Optional, Literal, List, Union
logger = logging.getLogger(__name__)
token = os.getenv('HF_TOKEN')
class ModelSettings(BaseSettings):
peft_model_id: str = "nielsr/idefics2-cord-demo"
hf_token: Optional[str] = token
model_settings = ModelSettings()
class ReceiptExtractor(Extractor):
name = "tensorlake/idefics2json"
description = "Finetuned Idefics2 for Image to JSON."
system_dependencies = []
input_mime_types = ["image/jpeg", "image/png"]
def __init__(self):
super(ReceiptExtractor, self).__init__()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
logger.info(f"Using device: {device.type}")
torch_dtype = torch.float32 if device.type == "cpu" else torch.float16
self.processor = AutoProcessor.from_pretrained(model_settings.peft_model_id)
# Load the base model with adapters on top
self.model = Idefics2ForConditionalGeneration.from_pretrained(
model_settings.peft_model_id,
torch_dtype=torch_dtype,
)
# get the resized input embeddings
filepath = hf_hub_download(repo_id="nielsr/idefics2-embeddings", filename="input_embeddings.pt", repo_type="dataset")
input_embeddings = torch.load(filepath, map_location="cpu")
input_embeddings_module = torch.nn.Embedding(input_embeddings.shape[0], input_embeddings.shape[1], _weight=input_embeddings)
# set the resized output embeddings
filepath = hf_hub_download(repo_id="nielsr/idefics2-embeddings", filename="output_embeddings.pt", repo_type="dataset")
output_embeddings = torch.load(filepath, map_location="cpu")
output_embeddings_module = torch.nn.Linear(output_embeddings.shape[0], output_embeddings.shape[1], bias=False)
output_embeddings_module.weight = output_embeddings
# set them accordingly
self.model.resize_token_embeddings(len(self.processor.tokenizer))
self.model.set_input_embeddings(input_embeddings_module)
self.model.set_output_embeddings(output_embeddings_module)
self.model.to("cuda")
def extract(self, content: Content, params = None) -> List[Union[Feature, Content]]:
image = Image.open(BytesIO(content.data))
# prepare image and prompt for the model
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Extract JSON."},
{"type": "image"},
]
},
]
prompt = self.processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = self.processor(text=prompt, images=[image], return_tensors="pt").to("cuda")
# Generate token IDs
generated_ids = self.model.generate(**inputs, max_new_tokens=768)
# Decode back into text
generated_texts = self.processor.batch_decode(generated_ids, skip_special_tokens=True)
added_vocab = self.processor.tokenizer.get_added_vocab()
generated_json = token2json(generated_texts[0], added_vocab)
return [Content.from_text(str(generated_json))]
def sample_input(self) -> Content:
filepath = "sample.jpg"
with open(filepath, 'rb') as f:
image_data = f.read()
return Content(content_type="image/jpg", data=image_data)
if __name__ == "__main__":
filepath = "sample.jpg"
with open(filepath, 'rb') as f:
image_data = f.read()
data = Content(content_type="image/jpg", data=image_data)
extractor = ReceiptExtractor()
results = extractor.extract(data)
print(results) |