File size: 4,633 Bytes
065b6ad
 
 
 
 
 
d9fa664
 
 
 
 
 
065b6ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9fa664
d8688d6
d9fa664
 
 
 
 
 
065b6ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9fa664
 
 
 
 
 
065b6ad
 
 
 
 
 
 
d9fa664
 
 
065b6ad
 
 
 
d9fa664
065b6ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8889abc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import requests
import os

from typing import List
from utils import encode_image
from PIL import Image
import torch
import subprocess
import psutil
import torch
from transformers import AutoModel, AutoTokenizer
import google.generativeai as genai



class Rag:
    
    def get_answer_from_gemini(self, query, imagePaths):

        print(f"Querying Gemini for query={query}, imagePaths={imagePaths}")

        try:
            genai.configure(api_key="AIzaSyCwRr9054tCuh2S8yGpwKFvOAxYMT4WNIs")
            model = genai.GenerativeModel('gemini-1.5-flash')
            
            images = [Image.open(path) for path in imagePaths]

            chat = model.start_chat()

            response = chat.send_message([*images, query])

            answer = response.text

            print(answer)
            
            return answer
        
        except Exception as e:
            print(f"An error occurred while querying Gemini: {e}")
            return f"Error: {str(e)}"
    
    #os.environ['OPENAI_API_KEY'] = "for the love of Jesus let this work"
    
    def get_answer_from_openai(self, query, imagesPaths):
        #import environ variables from .env
        import dotenv

         # Load the .env file
        dotenv_file = dotenv.find_dotenv()
        dotenv.load_dotenv(dotenv_file)
        """ #scuffed local hf inference (transformers incompatible to colpali version req, use ollama, more reliable, easier to use plus web server ready)
        print(f"Querying for query={query}, imagesPaths={imagesPaths}")

        model = AutoModel.from_pretrained(
            'openbmb/MiniCPM-o-2_6-int4',
            trust_remote_code=True,
            attn_implementation='flash_attention_2', # sdpa or flash_attention_2
            torch_dtype=torch.bfloat16,
            init_vision=True,
        )


        model = model.eval().cuda()
        tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6-int4', trust_remote_code=True)
        image = Image.open(imagesPaths[0]).convert('RGB')
        
        msgs = [{'role': 'user', 'content': [image, query]}]
        answer = model.chat(
            image=None,
            msgs=msgs,
            tokenizer=tokenizer
        )
        print(answer)
        return answer
        """

        #ollama method below
        
        torch.cuda.empty_cache() #release cuda so that ollama can use gpu!

    
        os.environ['OLLAMA_FLASH_ATTENTION'] = os.environ['flashattn'] #int "1"
        if os.environ['ollama'] == "minicpm-v":
            os.environ['ollama'] = "minicpm-v:8b-2.6-q8_0" #set to quantized version
        

        # Close model thread (colpali)
        print(f"Querying OpenAI for query={query}, imagesPaths={imagesPaths}")

        try:    
            
            response = chat(  
                    model=os.environ['ollama'],
                    messages=[
                    {
                    'role': 'user',
                    'content': query,
                    'images': imagesPaths,
                    "temperature":float(os.environ['temperature']), #test if temp makes a diff
                    }
                ],
                )
    
            answer = response.message.content
    
            print(answer)
    
            return answer
    
        except Exception as e:
            print(f"An error occurred while querying OpenAI: {e}")
            return None
        


    def __get_openai_api_payload(self, query:str, imagesPaths:List[str]):
        image_payload = []

        for imagePath in imagesPaths:
            base64_image = encode_image(imagePath)
            image_payload.append({
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}"
                }
            })

        payload = {
            "model": "Llama3.2-vision", #change model here as needed
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": query
                        },
                        *image_payload
                    ]
                }
            ],
            "max_tokens": 1024 #reduce token size to reduce processing time
        }

        return payload
    


# if __name__ == "__main__":
#     rag = Rag()
    
#     query = "Based on attached images, how many new cases were reported during second wave peak"
#     imagesPaths = ["covid_slides_page_8.png", "covid_slides_page_8.png"]
    
#     rag.get_answer_from_gemini(query, imagesPaths)