File size: 4,633 Bytes
065b6ad d9fa664 065b6ad d9fa664 d8688d6 d9fa664 065b6ad d9fa664 065b6ad d9fa664 065b6ad d9fa664 065b6ad 8889abc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import requests
import os
from typing import List
from utils import encode_image
from PIL import Image
import torch
import subprocess
import psutil
import torch
from transformers import AutoModel, AutoTokenizer
import google.generativeai as genai
class Rag:
def get_answer_from_gemini(self, query, imagePaths):
print(f"Querying Gemini for query={query}, imagePaths={imagePaths}")
try:
genai.configure(api_key="AIzaSyCwRr9054tCuh2S8yGpwKFvOAxYMT4WNIs")
model = genai.GenerativeModel('gemini-1.5-flash')
images = [Image.open(path) for path in imagePaths]
chat = model.start_chat()
response = chat.send_message([*images, query])
answer = response.text
print(answer)
return answer
except Exception as e:
print(f"An error occurred while querying Gemini: {e}")
return f"Error: {str(e)}"
#os.environ['OPENAI_API_KEY'] = "for the love of Jesus let this work"
def get_answer_from_openai(self, query, imagesPaths):
#import environ variables from .env
import dotenv
# Load the .env file
dotenv_file = dotenv.find_dotenv()
dotenv.load_dotenv(dotenv_file)
""" #scuffed local hf inference (transformers incompatible to colpali version req, use ollama, more reliable, easier to use plus web server ready)
print(f"Querying for query={query}, imagesPaths={imagesPaths}")
model = AutoModel.from_pretrained(
'openbmb/MiniCPM-o-2_6-int4',
trust_remote_code=True,
attn_implementation='flash_attention_2', # sdpa or flash_attention_2
torch_dtype=torch.bfloat16,
init_vision=True,
)
model = model.eval().cuda()
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True)
image = Image.open(imagesPaths[0]).convert('RGB')
msgs = [{'role': 'user', 'content': [image, query]}]
answer = model.chat(
image=None,
msgs=msgs,
tokenizer=tokenizer
)
print(answer)
return answer
"""
#ollama method below
torch.cuda.empty_cache() #release cuda so that ollama can use gpu!
os.environ['OLLAMA_FLASH_ATTENTION'] = os.environ['flashattn'] #int "1"
if os.environ['ollama'] == "minicpm-v":
os.environ['ollama'] = "minicpm-v:8b-2.6-q8_0" #set to quantized version
# Close model thread (colpali)
print(f"Querying OpenAI for query={query}, imagesPaths={imagesPaths}")
try:
response = chat(
model=os.environ['ollama'],
messages=[
{
'role': 'user',
'content': query,
'images': imagesPaths,
"temperature":float(os.environ['temperature']), #test if temp makes a diff
}
],
)
answer = response.message.content
print(answer)
return answer
except Exception as e:
print(f"An error occurred while querying OpenAI: {e}")
return None
def __get_openai_api_payload(self, query:str, imagesPaths:List[str]):
image_payload = []
for imagePath in imagesPaths:
base64_image = encode_image(imagePath)
image_payload.append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
})
payload = {
"model": "Llama3.2-vision", #change model here as needed
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": query
},
*image_payload
]
}
],
"max_tokens": 1024 #reduce token size to reduce processing time
}
return payload
# if __name__ == "__main__":
# rag = Rag()
# query = "Based on attached images, how many new cases were reported during second wave peak"
# imagesPaths = ["covid_slides_page_8.png", "covid_slides_page_8.png"]
# rag.get_answer_from_gemini(query, imagesPaths)
|