Spaces:
Running
Running
File size: 1,630 Bytes
6e805b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
from langchain.chains import TransformChain
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langchain import globals
from langchain_core.runnables import chain
import base64
from typing import Dict,List,Union
def load_image(inputs: dict) -> dict:
"""Load image from file and encode it as base64."""
image_path = inputs["image_path"]
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
image_base64 = encode_image(image_path)
return {"image": image_base64}
load_image_chain = TransformChain(
input_variables=["image_path"],
output_variables=["image"],
transform=load_image
)
@chain
def image_model(inputs: dict) -> Union[str, List[str], dict]:
"""Invoke model with image and prompt."""
model = ChatOpenAI(temperature=0.1, model="gpt-4o", max_tokens=1024)
parser = inputs["parser"]
msg = model.invoke(
[HumanMessage(
content=[
{"type": "text", "text": inputs["prompt"]},
{"type": "text", "text": parser.get_format_instructions()},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}},
])]
)
return msg.content
def get_image_informations(image_path: str,prompt,parser) -> dict:
vision_chain = load_image_chain | image_model | parser
return vision_chain.invoke({'image_path': f'{image_path}',
'prompt': prompt,
'parser': parser
}) |