from clip_component import get_token_from_clip from grounding_component import run_grounding def detect(image): token = get_token_from_clip(image) print('token') print(token) predict_image = run_grounding(image,token) return predict_image