|
import gradio as gr |
|
from PIL import Image |
|
from transformers import pipeline, AutoModelForVision2Seq, AutoProcessor |
|
import torch |
|
|
|
|
|
processor = AutoProcessor.from_pretrained("OpenGVLab/InternVL-Chat-V1-5") |
|
model = AutoModelForVision2Seq.from_pretrained("OpenGVLab/InternVL-Chat-V1-5") |
|
|
|
|
|
llama_model = pipeline("text2text-generation", model="llama3") |
|
|
|
def process_image(image): |
|
|
|
inputs = processor(images=image, return_tensors="pt") |
|
generated_ids = model.generate(**inputs) |
|
extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
|
return extracted_text |
|
|
|
def adjust_recipe(extracted_text, adjustment): |
|
|
|
prompt = f"Here is a recipe: {extracted_text}. Please {adjustment} the recipe." |
|
response = llama_model(prompt) |
|
return response[0]['generated_text'] |
|
|
|
def app(image, adjustment): |
|
extracted_text = process_image(image) |
|
adjusted_recipe = adjust_recipe(extracted_text, adjustment) |
|
return adjusted_recipe |
|
|
|
|
|
interface = gr.Interface( |
|
fn=app, |
|
inputs=[gr.inputs.Image(type="pil"), gr.inputs.Dropdown(["double", "halve"])], |
|
outputs="text", |
|
title="Recipe Adjuster", |
|
description="Upload an image of a recipe, and this app will double or halve the recipe." |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |
|
|