File size: 5,336 Bytes
065b6ad d9fa664 065b6ad d9fa664 065b6ad d9fa664 065b6ad d9fa664 065b6ad d9fa664 065b6ad d9fa664 065b6ad d9fa664 065b6ad d9fa664 065b6ad d9fa664 065b6ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
from colpali_engine.models import ColPali
from colpali_engine.models.paligemma.colpali.processing_colpali import ColPaliProcessor
from colpali_engine.utils.processing_utils import BaseVisualRetrieverProcessor
from colpali_engine.utils.torch_utils import ListDataset, get_torch_device
from torch.utils.data import DataLoader
import torch
from typing import List, cast
#from colpali_engine.models import ColQwen2_5, ColQwen2_5_Processor
from colpali_engine.models import ColIdefics3, ColIdefics3Processor
from tqdm import tqdm
from PIL import Image
import os
import spaces
#this part is for local runs
torch.cuda.empty_cache()
#get model name from .env variable & set directory & processor dir as the model names!
import dotenv
# Load the .env file
dotenv_file = dotenv.find_dotenv()
dotenv.load_dotenv(dotenv_file)
model_name = os.environ['colpali'] #"vidore/colSmol-256M"
device = get_torch_device("cuda") #try using cpu instead of cuda?
#switch to locally downloading models & loading locally rather than from hf
#
current_working_directory = os.getcwd()
save_directory = model_name # Directory to save the specific model name
save_directory = os.path.join(current_working_directory, save_directory)
processor_directory = model_name+'_processor' # Directory to save the processor
processor_directory = os.path.join(current_working_directory, processor_directory)
if not os.path.exists(save_directory): #download if directory not created/model not loaded
# Directory does not exist; create it
if "colSmol" in model_name: #if colsmol
model = ColIdefics3.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map=device,
).eval()
processor = cast(ColIdefics3Processor, ColIdefics3Processor.from_pretrained(model_name))
else: #if colpali v1.3 etc
model = ColPali.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map=device,
).eval()
processor = cast(ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name))
os.makedirs(save_directory)
print(f"Directory '{save_directory}' created.")
model.save_pretrained(save_directory)
os.makedirs(processor_directory)
processor.save_pretrained(processor_directory)
else:
if "colSmol" in model_name:
model = ColIdefics3.from_pretrained(save_directory)
processor = ColIdefics3Processor.from_pretrained(processor_directory, use_fast=True)
else:
model = ColPali.from_pretrained(save_directory)
processor = ColPaliProcessor.from_pretrained(processor_directory, use_fast=True)
class ColpaliManager:
def __init__(self, device = "cuda", model_name = model_name): #need to hot potato/use diff gpus between colpali & ollama
print(f"Initializing ColpaliManager with device {device} and model {model_name}")
# self.device = get_torch_device(device)
# self.model = ColPali.from_pretrained(
# model_name,
# torch_dtype=torch.bfloat16,
# device_map=self.device,
# ).eval()
# self.processor = cast(ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name))
@spaces.GPU
def get_images(self, paths: list[str]) -> List[Image.Image]:
model.to("cuda")
return [Image.open(path) for path in paths]
@spaces.GPU
def process_images(self, image_paths:list[str], batch_size=5):
model.to("cuda")
print(f"Processing {len(image_paths)} image_paths")
images = self.get_images(image_paths)
dataloader = DataLoader(
dataset=ListDataset[str](images),
batch_size=batch_size,
shuffle=False,
collate_fn=lambda x: processor.process_images(x),
)
ds: List[torch.Tensor] = []
for batch_doc in tqdm(dataloader):
with torch.no_grad():
batch_doc = {k: v.to(model.device) for k, v in batch_doc.items()}
embeddings_doc = model(**batch_doc)
ds.extend(list(torch.unbind(embeddings_doc.to(device))))
ds_np = [d.float().cpu().numpy() for d in ds]
return ds_np
@spaces.GPU
def process_text(self, texts: list[str]):
model.to("cuda") #ensure this is commented out so ollama/multimodal llm can use gpu! (nah wrong, need to enable so that it can process multiple)
print(f"Processing {len(texts)} texts")
dataloader = DataLoader(
dataset=ListDataset[str](texts),
batch_size=5,
shuffle=False,
collate_fn=lambda x: processor.process_queries(x),
)
qs: List[torch.Tensor] = []
for batch_query in dataloader:
with torch.no_grad():
batch_query = {k: v.to(model.device) for k, v in batch_query.items()}
embeddings_query = model(**batch_query)
qs.extend(list(torch.unbind(embeddings_query.to(device))))
qs_np = [q.float().cpu().numpy() for q in qs]
model.to("cpu") # Moves all model parameters and buffers to the CPU, freeing up gpu for ollama call after this process text call! (THIS WORKS!)
return qs_np
|