File size: 971 Bytes
660daa9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""VLM Helper Functions."""
import base64
import numpy as np
from openai import OpenAI


class GPT4V:
  """GPT4V VLM."""

  def __init__(self, openai_api_key):
    self.client = OpenAI(api_key=openai_api_key)

  def query(self, prompt_seq, temperature=0, max_tokens=512):
    """Queries GPT-4V."""
    content = []
    for elem in prompt_seq:
      if isinstance(elem, str):
        content.append({'type': 'text', 'text': elem})
      elif isinstance(elem, np.ndarray):
        base64_image_str = base64.b64encode(elem).decode('utf-8')
        image_url = f'data:image/jpeg;base64,{base64_image_str}'
        content.append({'type': 'image_url', 'image_url': {'url': image_url}})

    messages = [{'role': 'user', 'content': content}]

    response = self.client.chat.completions.create(
        model='gpt-4-vision-preview',
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens
    )

    return response.choices[0].message.content