Molmo-7B-D-0924-extended-tokens

Runtime error

App Files Files Community

Molmo-7B-D-0924-extended-tokens / app.py

sflindrs

Update app.py

4bf5dae verified 2 months ago

raw

history blame

4.06 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
	from PIL import Image
	import torch
	import spaces
	import json

	# Load the processor and model
	processor = AutoProcessor.from_pretrained(
	'allenai/Molmo-7B-D-0924',
	trust_remote_code=True,
	torch_dtype='auto',
	device_map='auto'
	)

	model = AutoModelForCausalLM.from_pretrained(
	'allenai/Molmo-7B-D-0924',
	trust_remote_code=True,
	torch_dtype='auto',
	device_map='auto'
	)

	import json

	def wrap_json_in_markdown(text):
	result = []
	stack = []
	json_start = None
	in_json = False
	i = 0
	while i < len(text):
	char = text[i]
	if char in ['{', '[']:
	if not in_json:
	json_start = i
	in_json = True
	stack.append(char)
	else:
	stack.append(char)
	elif char in ['}', ']'] and in_json:
	if not stack:
	# Unbalanced bracket, reset
	in_json = False
	json_start = None
	else:
	last = stack.pop()
	if (last == '{' and char != '}') or (last == '[' and char != ']'):
	# Mismatched brackets
	in_json = False
	json_start = None
	if in_json and not stack:
	# Potential end of JSON
	json_str = text[json_start:i+1]
	try:
	# Try to parse the JSON to ensure it's valid
	parsed = json.loads(json_str)
	# Wrap in Markdown code block
	wrapped = f"\n```json\n{json.dumps(parsed, indent=4)}\n```\n"
	result.append(text[:json_start]) # Append text before JSON
	result.append(wrapped) # Append wrapped JSON
	text = text[i+1:] # Update the remaining text
	i = -1 # Reset index
	except json.JSONDecodeError:
	# Not valid JSON, continue searching
	pass
	in_json = False
	json_start = None
	i += 1
	result.append(text) # Append any remaining text
	return ''.join(result)

	@spaces.GPU()
	def process_image_and_text(image, text):
	# Process the image and text
	inputs = processor.process(
	images=[Image.fromarray(image)],
	text=text
	)

	# Move inputs to the correct device and make a batch of size 1
	inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}

	# Generate output
	output = model.generate_from_batch(
	inputs,
	GenerationConfig(max_new_tokens=1024, stop_strings="<\|endoftext\|>"),
	tokenizer=processor.tokenizer
	)

	# Only get generated tokens; decode them to text
	generated_tokens = output[0, inputs['input_ids'].size(1):]
	generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
	generated_text_w_json_wrapper = wrap_json_in_markdown(generated_text)

	return generated_text_w_json_wrapper

	def chatbot(image, text, history):
	if image is None:
	return history + [("Please upload an image first.", None)]

	response = process_image_and_text(image, text)

	history.append({"role": "user", "content": text})
	history.append({"role": "assistant", "content": response})
	return history

	# Define the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# Image Chatbot with Molmo-7B-D-0924")

	with gr.Row():
	image_input = gr.Image(type="numpy")
	chatbot_output = gr.Chatbot(type="messages")

	text_input = gr.Textbox(placeholder="Ask a question about the image...")
	submit_button = gr.Button("Submit")

	state = gr.State([])

	submit_button.click(
	chatbot,
	inputs=[image_input, text_input, state],
	outputs=[chatbot_output]
	)

	text_input.submit(
	chatbot,
	inputs=[image_input, text_input, state],
	outputs=[chatbot_output]
	)

	demo.launch()