ga89tiy commited on Apr 25, 2024

Commit

6edd88e

1 Parent(s): 1db0e44

cleanup

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

LLAVA_Biovil/llava/eval/__init__.py +0 -0
LLAVA_Biovil/llava/eval/eval_gpt_review.py +0 -113
LLAVA_Biovil/llava/eval/eval_gpt_review_bench.py +0 -121
LLAVA_Biovil/llava/eval/eval_gpt_review_visual.py +0 -118
LLAVA_Biovil/llava/eval/eval_pope.py +0 -81
LLAVA_Biovil/llava/eval/eval_science_qa.py +0 -114
LLAVA_Biovil/llava/eval/eval_science_qa_gpt4.py +0 -104
LLAVA_Biovil/llava/eval/eval_science_qa_gpt4_requery.py +0 -149
LLAVA_Biovil/llava/eval/eval_textvqa.py +0 -65
LLAVA_Biovil/llava/eval/generate_webpage_data_from_table.py +0 -111
LLAVA_Biovil/llava/eval/m4c_evaluator.py +0 -334
LLAVA_Biovil/llava/eval/model_qa.py +0 -85
LLAVA_Biovil/llava/eval/model_vqa.py +0 -112
LLAVA_Biovil/llava/eval/model_vqa_loader.py +0 -141
LLAVA_Biovil/llava/eval/model_vqa_mmbench.py +0 -169
LLAVA_Biovil/llava/eval/model_vqa_qbench.py +0 -120
LLAVA_Biovil/llava/eval/model_vqa_science.py +0 -147
LLAVA_Biovil/llava/eval/qa_baseline_gpt35.py +0 -74
LLAVA_Biovil/llava/eval/run_llava.py +0 -155
LLAVA_Biovil/llava/eval/summarize_gpt_review.py +0 -60
LLAVA_Biovil/llava/eval/webpage/figures/alpaca.png +0 -0
LLAVA_Biovil/llava/eval/webpage/figures/bard.jpg +0 -0
LLAVA_Biovil/llava/eval/webpage/figures/chatgpt.svg +0 -1
LLAVA_Biovil/llava/eval/webpage/figures/llama.jpg +0 -0
LLAVA_Biovil/llava/eval/webpage/figures/swords_FILL0_wght300_GRAD0_opsz48.svg +0 -1
LLAVA_Biovil/llava/eval/webpage/figures/vicuna.jpeg +0 -0
LLAVA_Biovil/llava/eval/webpage/index.html +0 -162
LLAVA_Biovil/llava/eval/webpage/script.js +0 -245
LLAVA_Biovil/llava/eval/webpage/styles.css +0 -105
LLAVA_Biovil/llava/mm_utils.py +1 -1
LLAVA_Biovil/llava/model/apply_delta.py +1 -1
LLAVA_Biovil/llava/model/builder.py +8 -8
LLAVA_Biovil/llava/model/consolidate.py +1 -1
LLAVA_Biovil/llava/model/language_model/llava_llama.py +1 -1
LLAVA_Biovil/llava/model/language_model/llava_mpt.py +2 -2
LLAVA_Biovil/llava/model/llava_arch.py +6 -6
LLAVA_Biovil/llava/serve/__init__.py +0 -0
LLAVA_Biovil/llava/serve/cli.py +0 -122
LLAVA_Biovil/llava/serve/controller.py +0 -296
LLAVA_Biovil/llava/serve/examples/extreme_ironing.jpg +0 -0
LLAVA_Biovil/llava/serve/examples/waterview.jpg +0 -0
LLAVA_Biovil/llava/serve/gradio_web_server.py +0 -470
LLAVA_Biovil/llava/serve/model_worker.py +0 -310
LLAVA_Biovil/llava/serve/register_worker.py +0 -26
LLAVA_Biovil/llava/serve/test_message.py +0 -62
LLAVA_Biovil/llava/train/__init__.py +0 -0
LLAVA_Biovil/llava/train/llama_flash_attn_monkey_patch.py +0 -115
LLAVA_Biovil/llava/train/llama_patch.py +0 -139
LLAVA_Biovil/llava/train/llama_xformers_attn_monkey_patch.py +0 -129
LLAVA_Biovil/llava/train/llava_trainer.py +0 -801

LLAVA_Biovil/llava/eval/__init__.py DELETED Viewed

File without changes

LLAVA_Biovil/llava/eval/eval_gpt_review.py DELETED Viewed

@@ -1,113 +0,0 @@
-import argparse
-import json
-import os
-import openai
-import tqdm
-import ray
-import time
-NUM_SECONDS_TO_SLEEP = 3
-@ray.remote(num_cpus=4)
-def get_eval(content: str, max_tokens: int):
-    while True:
-        try:
-            response = openai.ChatCompletion.create(
-                model='gpt-4',
-                messages=[{
-                    'role': 'system',
-                    'content': 'You are a helpful and precise assistant for checking the quality of the answer.'
-                }, {
-                    'role': 'user',
-                    'content': content,
-                }],
-                temperature=0.2,  # TODO: figure out which temperature is best for evaluation
-                max_tokens=max_tokens,
-            )
-            break
-        except openai.error.RateLimitError:
-            pass
-        except Exception as e:
-            print(e)
-        time.sleep(NUM_SECONDS_TO_SLEEP)
-    print('success!')
-    return response['choices'][0]['message']['content']
-def parse_score(review):
-    try:
-        score_pair = review.split('\n')[0]
-        score_pair = score_pair.replace(',', ' ')
-        sp = score_pair.split(' ')
-        if len(sp) == 2:
-            return [float(sp[0]), float(sp[1])]
-        else:
-            print('error', review)
-            return [-1, -1]
-    except Exception as e:
-        print(e)
-        print('error', review)
-        return [-1, -1]
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='ChatGPT-based QA evaluation.')
-    parser.add_argument('-q', '--question')
-    # parser.add_argument('-a', '--answer')
-    parser.add_argument('-a', '--answer-list', nargs='+', default=[])
-    parser.add_argument('-r', '--rule')
-    parser.add_argument('-o', '--output')
-    parser.add_argument('--max-tokens', type=int, default=1024, help='maximum number of tokens produced in the output')
-    args = parser.parse_args()
-    ray.init()
-    f_q = open(os.path.expanduser(args.question))
-    f_ans1 = open(os.path.expanduser(args.answer_list[0]))
-    f_ans2 = open(os.path.expanduser(args.answer_list[1]))
-    rule_dict = json.load(open(os.path.expanduser(args.rule), 'r'))
-    review_file = open(f'{args.output}', 'w')
-    js_list = []
-    handles = []
-    idx = 0
-    for ques_js, ans1_js, ans2_js in zip(f_q, f_ans1, f_ans2):
-        # if idx == 1:
-        #     break
-        ques = json.loads(ques_js)
-        ans1 = json.loads(ans1_js)
-        ans2 = json.loads(ans2_js)
-        category = json.loads(ques_js)['category']
-        if category in rule_dict:
-            rule = rule_dict[category]
-        else:
-            rule = rule_dict['default']
-        prompt = rule['prompt']
-        role = rule['role']
-        content = (f'[Question]\n{ques["text"]}\n\n'
-                   f'[{role} 1]\n{ans1["text"]}\n\n[End of {role} 1]\n\n'
-                   f'[{role} 2]\n{ans2["text"]}\n\n[End of {role} 2]\n\n'
-                   f'[System]\n{prompt}\n\n')
-        js_list.append({
-            'id': idx+1,
-            'question_id': ques['question_id'],
-            'answer1_id': ans1['answer_id'],
-            'answer2_id': ans2['answer_id'],
-            'category': category})
-        idx += 1
-        handles.append(get_eval.remote(content, args.max_tokens))
-        # To avoid the rate limit set by OpenAI
-        time.sleep(NUM_SECONDS_TO_SLEEP)
-    reviews = ray.get(handles)
-    for idx, review in enumerate(reviews):
-        scores = parse_score(review)
-        js_list[idx]['content'] = review
-        js_list[idx]['tuple'] = scores
-        review_file.write(json.dumps(js_list[idx]) + '\n')
-    review_file.close()

LLAVA_Biovil/llava/eval/eval_gpt_review_bench.py DELETED Viewed

@@ -1,121 +0,0 @@
-import argparse
-import json
-import os
-import openai
-import time
-NUM_SECONDS_TO_SLEEP = 0.5
-def get_eval(content: str, max_tokens: int):
-    while True:
-        try:
-            response = openai.ChatCompletion.create(
-                model='gpt-4-0314',
-                messages=[{
-                    'role': 'system',
-                    'content': 'You are a helpful and precise assistant for checking the quality of the answer.'
-                }, {
-                    'role': 'user',
-                    'content': content,
-                }],
-                temperature=0.2,  # TODO: figure out which temperature is best for evaluation
-                max_tokens=max_tokens,
-            )
-            break
-        except openai.error.RateLimitError:
-            pass
-        except Exception as e:
-            print(e)
-        time.sleep(NUM_SECONDS_TO_SLEEP)
-    return response['choices'][0]['message']['content']
-def parse_score(review):
-    try:
-        score_pair = review.split('\n')[0]
-        score_pair = score_pair.replace(',', ' ')
-        sp = score_pair.split(' ')
-        if len(sp) == 2:
-            return [float(sp[0]), float(sp[1])]
-        else:
-            print('error', review)
-            return [-1, -1]
-    except Exception as e:
-        print(e)
-        print('error', review)
-        return [-1, -1]
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='ChatGPT-based QA evaluation.')
-    parser.add_argument('-q', '--question')
-    parser.add_argument('-c', '--context')
-    parser.add_argument('-a', '--answer-list', nargs='+', default=[])
-    parser.add_argument('-r', '--rule')
-    parser.add_argument('-o', '--output')
-    parser.add_argument('--max-tokens', type=int, default=1024, help='maximum number of tokens produced in the output')
-    args = parser.parse_args()
-    f_q = open(os.path.expanduser(args.question))
-    f_ans1 = open(os.path.expanduser(args.answer_list[0]))
-    f_ans2 = open(os.path.expanduser(args.answer_list[1]))
-    rule_dict = json.load(open(os.path.expanduser(args.rule), 'r'))
-    if os.path.isfile(os.path.expanduser(args.output)):
-        cur_reviews = [json.loads(line) for line in open(os.path.expanduser(args.output))]
-    else:
-        cur_reviews = []
-    review_file = open(f'{args.output}', 'a')
-    context_list = [json.loads(line) for line in open(os.path.expanduser(args.context))]
-    image_to_context = {context['image']: context for context in context_list}
-    handles = []
-    idx = 0
-    for ques_js, ans1_js, ans2_js in zip(f_q, f_ans1, f_ans2):
-        ques = json.loads(ques_js)
-        ans1 = json.loads(ans1_js)
-        ans2 = json.loads(ans2_js)
-        inst = image_to_context[ques['image']]
-        if isinstance(inst['caption'], list):
-            cap_str = '\n'.join(inst['caption'])
-        else:
-            cap_str = inst['caption']
-        category = 'llava_bench_' + json.loads(ques_js)['category']
-        if category in rule_dict:
-            rule = rule_dict[category]
-        else:
-            assert False, f"Visual QA category not found in rule file: {category}."
-        prompt = rule['prompt']
-        role = rule['role']
-        content = (f'[Context]\n{cap_str}\n\n'
-                   f'[Question]\n{ques["text"]}\n\n'
-                   f'[{role} 1]\n{ans1["text"]}\n\n[End of {role} 1]\n\n'
-                   f'[{role} 2]\n{ans2["text"]}\n\n[End of {role} 2]\n\n'
-                   f'[System]\n{prompt}\n\n')
-        cur_js = {
-            'id': idx+1,
-            'question_id': ques['question_id'],
-            'answer1_id': ans1.get('answer_id', ans1['question_id']),
-            'answer2_id': ans2.get('answer_id', ans2['answer_id']),
-            'category': category
-        }
-        if idx >= len(cur_reviews):
-            review = get_eval(content, args.max_tokens)
-            scores = parse_score(review)
-            cur_js['content'] = review
-            cur_js['tuple'] = scores
-            review_file.write(json.dumps(cur_js) + '\n')
-            review_file.flush()
-        else:
-            print(f'Skipping {idx} as we already have it.')
-        idx += 1
-        print(idx)
-    review_file.close()

LLAVA_Biovil/llava/eval/eval_gpt_review_visual.py DELETED Viewed

@@ -1,118 +0,0 @@
-import argparse
-import json
-import os
-import openai
-import time
-NUM_SECONDS_TO_SLEEP = 0.5
-def get_eval(content: str, max_tokens: int):
-    while True:
-        try:
-            response = openai.ChatCompletion.create(
-                model='gpt-4-0314',
-                messages=[{
-                    'role': 'system',
-                    'content': 'You are a helpful and precise assistant for checking the quality of the answer.'
-                }, {
-                    'role': 'user',
-                    'content': content,
-                }],
-                temperature=0.2,  # TODO: figure out which temperature is best for evaluation
-                max_tokens=max_tokens,
-            )
-            break
-        except openai.error.RateLimitError:
-            pass
-        except Exception as e:
-            print(e)
-        time.sleep(NUM_SECONDS_TO_SLEEP)
-    return response['choices'][0]['message']['content']
-def parse_score(review):
-    try:
-        score_pair = review.split('\n')[0]
-        score_pair = score_pair.replace(',', ' ')
-        sp = score_pair.split(' ')
-        if len(sp) == 2:
-            return [float(sp[0]), float(sp[1])]
-        else:
-            print('error', review)
-            return [-1, -1]
-    except Exception as e:
-        print(e)
-        print('error', review)
-        return [-1, -1]
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='ChatGPT-based QA evaluation.')
-    parser.add_argument('-q', '--question')
-    parser.add_argument('-c', '--context')
-    parser.add_argument('-a', '--answer-list', nargs='+', default=[])
-    parser.add_argument('-r', '--rule')
-    parser.add_argument('-o', '--output')
-    parser.add_argument('--max-tokens', type=int, default=1024, help='maximum number of tokens produced in the output')
-    args = parser.parse_args()
-    f_q = open(os.path.expanduser(args.question))
-    f_ans1 = open(os.path.expanduser(args.answer_list[0]))
-    f_ans2 = open(os.path.expanduser(args.answer_list[1]))
-    rule_dict = json.load(open(os.path.expanduser(args.rule), 'r'))
-    if os.path.isfile(os.path.expanduser(args.output)):
-        cur_reviews = [json.loads(line) for line in open(os.path.expanduser(args.output))]
-    else:
-        cur_reviews = []
-    review_file = open(f'{args.output}', 'a')
-    context_list = [json.loads(line) for line in open(os.path.expanduser(args.context))]
-    image_to_context = {context['image']: context for context in context_list}
-    handles = []
-    idx = 0
-    for ques_js, ans1_js, ans2_js in zip(f_q, f_ans1, f_ans2):
-        ques = json.loads(ques_js)
-        ans1 = json.loads(ans1_js)
-        ans2 = json.loads(ans2_js)
-        inst = image_to_context[ques['image']]
-        cap_str = '\n'.join(inst['captions'])
-        box_str = '\n'.join([f'{instance["category"]}: {instance["bbox"]}' for instance in inst['instances']])
-        category = json.loads(ques_js)['category']
-        if category in rule_dict:
-            rule = rule_dict[category]
-        else:
-            assert False, f"Visual QA category not found in rule file: {category}."
-        prompt = rule['prompt']
-        role = rule['role']
-        content = (f'[Context]\n{cap_str}\n\n{box_str}\n\n'
-                   f'[Question]\n{ques["text"]}\n\n'
-                   f'[{role} 1]\n{ans1["text"]}\n\n[End of {role} 1]\n\n'
-                   f'[{role} 2]\n{ans2["text"]}\n\n[End of {role} 2]\n\n'
-                   f'[System]\n{prompt}\n\n')
-        cur_js = {
-            'id': idx+1,
-            'question_id': ques['question_id'],
-            'answer1_id': ans1.get('answer_id', ans1['question_id']),
-            'answer2_id': ans2.get('answer_id', ans2['answer_id']),
-            'category': category
-        }
-        if idx >= len(cur_reviews):
-            review = get_eval(content, args.max_tokens)
-            scores = parse_score(review)
-            cur_js['content'] = review
-            cur_js['tuple'] = scores
-            review_file.write(json.dumps(cur_js) + '\n')
-            review_file.flush()
-        else:
-            print(f'Skipping {idx} as we already have it.')
-        idx += 1
-        print(idx)
-    review_file.close()

LLAVA_Biovil/llava/eval/eval_pope.py DELETED Viewed

@@ -1,81 +0,0 @@
-import os
-import json
-import argparse
-def eval_pope(answers, label_file):
-    label_list = [json.loads(q)['label'] for q in open(label_file, 'r')]
-    for answer in answers:
-        text = answer['text']
-        # Only keep the first sentence
-        if text.find('.') != -1:
-            text = text.split('.')[0]
-        text = text.replace(',', '')
-        words = text.split(' ')
-        if 'No' in words or 'not' in words or 'no' in words:
-            answer['text'] = 'no'
-        else:
-            answer['text'] = 'yes'
-    for i in range(len(label_list)):
-        if label_list[i] == 'no':
-            label_list[i] = 0
-        else:
-            label_list[i] = 1
-    pred_list = []
-    for answer in answers:
-        if answer['text'] == 'no':
-            pred_list.append(0)
-        else:
-            pred_list.append(1)
-    pos = 1
-    neg = 0
-    yes_ratio = pred_list.count(1) / len(pred_list)
-    TP, TN, FP, FN = 0, 0, 0, 0
-    for pred, label in zip(pred_list, label_list):
-        if pred == pos and label == pos:
-            TP += 1
-        elif pred == pos and label == neg:
-            FP += 1
-        elif pred == neg and label == neg:
-            TN += 1
-        elif pred == neg and label == pos:
-            FN += 1
-    print('TP\tFP\tTN\tFN\t')
-    print('{}\t{}\t{}\t{}'.format(TP, FP, TN, FN))
-    precision = float(TP) / float(TP + FP)
-    recall = float(TP) / float(TP + FN)
-    f1 = 2*precision*recall / (precision + recall)
-    acc = (TP + TN) / (TP + TN + FP + FN)
-    print('Accuracy: {}'.format(acc))
-    print('Precision: {}'.format(precision))
-    print('Recall: {}'.format(recall))
-    print('F1 score: {}'.format(f1))
-    print('Yes ratio: {}'.format(yes_ratio))
-    print('%.3f, %.3f, %.3f, %.3f, %.3f' % (f1, acc, precision, recall, yes_ratio) )
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--annotation-dir", type=str)
-    parser.add_argument("--question-file", type=str)
-    parser.add_argument("--result-file", type=str)
-    args = parser.parse_args()
-    questions = [json.loads(line) for line in open(args.question_file)]
-    questions = {question['question_id']: question for question in questions}
-    answers = [json.loads(q) for q in open(args.result_file)]
-    for file in os.listdir(args.annotation_dir):
-        assert file.startswith('coco_pope_')
-        assert file.endswith('.json')
-        category = file[10:-5]
-        cur_answers = [x for x in answers if questions[x['question_id']]['category'] == category]
-        print('Category: {}, # samples: {}'.format(category, len(cur_answers)))
-        eval_pope(cur_answers, os.path.join(args.annotation_dir, file))
-        print("====================================")

LLAVA_Biovil/llava/eval/eval_science_qa.py DELETED Viewed

@@ -1,114 +0,0 @@
-import argparse
-import json
-import os
-import re
-import random
-def get_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--base-dir', type=str)
-    parser.add_argument('--result-file', type=str)
-    parser.add_argument('--output-file', type=str)
-    parser.add_argument('--output-result', type=str)
-    parser.add_argument('--split', type=str, default='test')
-    parser.add_argument('--options', type=list, default=["A", "B", "C", "D", "E"])
-    return parser.parse_args()
-def convert_caps(results):
-    fakecaps = []
-    for result in results:
-        image_id = result['question_id']
-        caption = result['text']
-        fakecaps.append({"image_id": int(image_id), "caption": caption})
-    return fakecaps
-def get_pred_idx(prediction, choices, options):
-    """
-    Get the index (e.g. 2) from the prediction (e.g. 'C')
-    """
-    if prediction in options[:len(choices)]:
-        return options.index(prediction)
-    else:
-        return -1
-        return random.choice(range(len(choices)))
-if __name__ == "__main__":
-    args = get_args()
-    base_dir = args.base_dir
-    split_indices = json.load(open(os.path.join(base_dir, "pid_splits.json")))[args.split]
-    problems = json.load(open(os.path.join(base_dir, "problems.json")))
-    predictions = [json.loads(line) for line in open(args.result_file)]
-    predictions = {pred['question_id']: pred for pred in predictions}
-    split_problems = {idx: problems[idx] for idx in split_indices}
-    results = {'correct': [], 'incorrect': []}
-    sqa_results = {}
-    sqa_results['acc'] = None
-    sqa_results['correct'] = None
-    sqa_results['count'] = None
-    sqa_results['results'] = {}
-    sqa_results['outputs'] = {}
-    for prob_id, prob in split_problems.items():
-        if prob_id not in predictions:
-            pred = {'text': 'FAILED', 'prompt': 'Unknown'}
-            pred_text = 'FAILED'
-        else:
-            pred = predictions[prob_id]
-            pred_text = pred['text']
-        if pred_text in args.options:
-            answer = pred_text
-        elif len(pred_text) >= 3 and pred_text[0] in args.options and pred_text[1:3] == ". ":
-            answer = pred_text[0]
-        else:
-            pattern = re.compile(r'The answer is ([A-Z]).')
-            res = pattern.findall(pred_text)
-            if len(res) == 1:
-                answer = res[0]  # 'A', 'B', ...
-            else:
-                answer = "FAILED"
-        pred_idx = get_pred_idx(answer, prob['choices'], args.options)
-        analysis = {
-            'question_id': prob_id,
-            'parsed_ans': answer,
-            'ground_truth': args.options[prob['answer']],
-            'question': pred['prompt'],
-            'pred': pred_text,
-            'is_multimodal': '<image>' in pred['prompt'],
-        }
-        sqa_results['results'][prob_id] = get_pred_idx(answer, prob['choices'], args.options)
-        sqa_results['outputs'][prob_id] = pred_text
-        if pred_idx == prob['answer']:
-            results['correct'].append(analysis)
-        else:
-            results['incorrect'].append(analysis)
-    correct = len(results['correct'])
-    total = len(results['correct']) + len(results['incorrect'])
-    ###### IMG ######
-    multimodal_correct = len([x for x in results['correct'] if x['is_multimodal']])
-    multimodal_incorrect = len([x for x in results['incorrect'] if x['is_multimodal']])
-    multimodal_total = multimodal_correct + multimodal_incorrect
-    ###### IMG ######
-    print(f'Total: {total}, Correct: {correct}, Accuracy: {correct / total * 100:.2f}%, IMG-Accuracy: {multimodal_correct / multimodal_total * 100:.2f}%')
-    sqa_results['acc'] = correct / total * 100
-    sqa_results['correct'] = correct
-    sqa_results['count'] = total
-    with open(args.output_file, 'w') as f:
-        json.dump(results, f, indent=2)
-    with open(args.output_result, 'w') as f:
-        json.dump(sqa_results, f, indent=2)

LLAVA_Biovil/llava/eval/eval_science_qa_gpt4.py DELETED Viewed

@@ -1,104 +0,0 @@
-import argparse
-import json
-import os
-import re
-import random
-from collections import defaultdict
-def get_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--base-dir', type=str)
-    parser.add_argument('--gpt4-result', type=str)
-    parser.add_argument('--our-result', type=str)
-    parser.add_argument('--split', type=str, default='test')
-    parser.add_argument('--options', type=list, default=["A", "B", "C", "D", "E"])
-    return parser.parse_args()
-def convert_caps(results):
-    fakecaps = []
-    for result in results:
-        image_id = result['question_id']
-        caption = result['text']
-        fakecaps.append({"image_id": int(image_id), "caption": caption})
-    return fakecaps
-def get_pred_idx(prediction, choices, options):
-    """
-    Get the index (e.g. 2) from the prediction (e.g. 'C')
-    """
-    if prediction in options[:len(choices)]:
-        return options.index(prediction)
-    else:
-        return random.choice(range(len(choices)))
-if __name__ == "__main__":
-    args = get_args()
-    base_dir = args.base_dir
-    split_indices = json.load(open(os.path.join(base_dir, "pid_splits.json")))[args.split]
-    problems = json.load(open(os.path.join(base_dir, "problems.json")))
-    our_predictions = [json.loads(line) for line in open(args.our_result)]
-    our_predictions = {pred['question_id']: pred for pred in our_predictions}
-    split_problems = {idx: problems[idx] for idx in split_indices}
-    gpt4_predictions = json.load(open(args.gpt4_result))['outputs']
-    results = defaultdict(lambda: 0)
-    for prob_id, prob in split_problems.items():
-        if prob_id not in our_predictions:
-            continue
-        if prob_id not in gpt4_predictions:
-            continue
-        our_pred = our_predictions[prob_id]['text']
-        gpt4_pred = gpt4_predictions[prob_id]
-        pattern = re.compile(r'The answer is ([A-Z]).')
-        our_res = pattern.findall(our_pred)
-        if len(our_res) == 1:
-            our_answer = our_res[0]  # 'A', 'B', ...
-        else:
-            our_answer = "FAILED"
-        gpt4_res = pattern.findall(gpt4_pred)
-        if len(gpt4_res) == 1:
-            gpt4_answer = gpt4_res[0]  # 'A', 'B', ...
-        else:
-            gpt4_answer = "FAILED"
-        our_pred_idx = get_pred_idx(our_answer, prob['choices'], args.options)
-        gpt4_pred_idx = get_pred_idx(gpt4_answer, prob['choices'], args.options)
-        if gpt4_answer == 'FAILED':
-            results['gpt4_failed'] += 1
-            # continue
-            gpt4_pred_idx = our_pred_idx
-            # if our_pred_idx != prob['answer']:
-            #     print(our_predictions[prob_id]['prompt'])
-            #     print('-----------------')
-            #     print(f'LECTURE: {prob["lecture"]}')
-            #     print(f'SOLUTION: {prob["solution"]}')
-            #     print('=====================')
-        else:
-            # continue
-            pass
-        # gpt4_pred_idx = our_pred_idx
-        if gpt4_pred_idx == prob['answer']:
-            results['correct'] += 1
-        else:
-            results['incorrect'] += 1
-        if gpt4_pred_idx == prob['answer'] or our_pred_idx == prob['answer']:
-            results['correct_upperbound'] += 1
-    correct = results['correct']
-    total = results['correct'] + results['incorrect']
-    print(f'Total: {total}, Correct: {correct}, Accuracy: {correct / total * 100:.2f}%')
-    print(f'Total: {total}, Correct (upper): {results["correct_upperbound"]}, Accuracy: {results["correct_upperbound"] / total * 100:.2f}%')
-    print(f'Total: {total}, GPT-4 NO-ANS (RANDOM): {results["gpt4_failed"]}, Percentage: {results["gpt4_failed"] / total * 100:.2f}%')

LLAVA_Biovil/llava/eval/eval_science_qa_gpt4_requery.py DELETED Viewed

@@ -1,149 +0,0 @@
-import argparse
-import json
-import os
-import re
-import random
-from collections import defaultdict
-def get_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--base-dir', type=str)
-    parser.add_argument('--gpt4-result', type=str)
-    parser.add_argument('--requery-result', type=str)
-    parser.add_argument('--our-result', type=str)
-    parser.add_argument('--output-result', type=str)
-    parser.add_argument('--split', type=str, default='test')
-    parser.add_argument('--options', type=list, default=["A", "B", "C", "D", "E"])
-    return parser.parse_args()
-def convert_caps(results):
-    fakecaps = []
-    for result in results:
-        image_id = result['question_id']
-        caption = result['text']
-        fakecaps.append({"image_id": int(image_id), "caption": caption})
-    return fakecaps
-def get_pred_idx(prediction, choices, options):
-    """
-    Get the index (e.g. 2) from the prediction (e.g. 'C')
-    """
-    if prediction in options[:len(choices)]:
-        return options.index(prediction)
-    else:
-        return random.choice(range(len(choices)))
-if __name__ == "__main__":
-    args = get_args()
-    base_dir = args.base_dir
-    split_indices = json.load(open(os.path.join(base_dir, "pid_splits.json")))[args.split]
-    problems = json.load(open(os.path.join(base_dir, "problems.json")))
-    our_predictions = [json.loads(line) for line in open(args.our_result)]
-    our_predictions = {pred['question_id']: pred for pred in our_predictions}
-    split_problems = {idx: problems[idx] for idx in split_indices}
-    requery_predictions = [json.loads(line) for line in open(args.requery_result)]
-    requery_predictions = {pred['question_id']: pred for pred in requery_predictions}
-    gpt4_predictions = json.load(open(args.gpt4_result))['outputs']
-    results = defaultdict(lambda: 0)
-    sqa_results = {}
-    sqa_results['acc'] = None
-    sqa_results['correct'] = None
-    sqa_results['count'] = None
-    sqa_results['results'] = {}
-    sqa_results['outputs'] = {}
-    for prob_id, prob in split_problems.items():
-        if prob_id not in our_predictions:
-            assert False
-        if prob_id not in gpt4_predictions:
-            assert False
-        our_pred = our_predictions[prob_id]['text']
-        gpt4_pred = gpt4_predictions[prob_id]
-        if prob_id not in requery_predictions:
-            results['missing_requery'] += 1
-            requery_pred = "MISSING"
-        else:
-            requery_pred = requery_predictions[prob_id]['text']
-        pattern = re.compile(r'The answer is ([A-Z]).')
-        our_res = pattern.findall(our_pred)
-        if len(our_res) == 1:
-            our_answer = our_res[0]  # 'A', 'B', ...
-        else:
-            our_answer = "FAILED"
-        requery_res = pattern.findall(requery_pred)
-        if len(requery_res) == 1:
-            requery_answer = requery_res[0]  # 'A', 'B', ...
-        else:
-            requery_answer = "FAILED"
-        gpt4_res = pattern.findall(gpt4_pred)
-        if len(gpt4_res) == 1:
-            gpt4_answer = gpt4_res[0]  # 'A', 'B', ...
-        else:
-            gpt4_answer = "FAILED"
-        our_pred_idx = get_pred_idx(our_answer, prob['choices'], args.options)
-        gpt4_pred_idx = get_pred_idx(gpt4_answer, prob['choices'], args.options)
-        requery_pred_idx = get_pred_idx(requery_answer, prob['choices'], args.options)
-        results['total'] += 1
-        if gpt4_answer == 'FAILED':
-            results['gpt4_failed'] += 1
-            if gpt4_pred_idx == prob['answer']:
-                results['gpt4_correct'] += 1
-            if our_pred_idx == prob['answer']:
-                results['gpt4_ourvisual_correct'] += 1
-        elif gpt4_pred_idx == prob['answer']:
-            results['gpt4_correct'] += 1
-            results['gpt4_ourvisual_correct'] += 1
-        if our_pred_idx == prob['answer']:
-            results['our_correct'] += 1
-        if requery_answer == 'FAILED':
-            sqa_results['results'][prob_id] = our_pred_idx
-            if our_pred_idx == prob['answer']:
-                results['requery_correct'] += 1
-        else:
-            sqa_results['results'][prob_id] = requery_pred_idx
-            if requery_pred_idx == prob['answer']:
-                results['requery_correct'] += 1
-            else:
-                print(f"""
-Question ({args.options[prob['answer']]}): {our_predictions[prob_id]['prompt']}
-Our ({our_answer}): {our_pred}
-GPT-4 ({gpt4_answer}): {gpt4_pred}
-Requery ({requery_answer}): {requery_pred}
-print("=====================================")
-""")
-        if gpt4_pred_idx == prob['answer'] or our_pred_idx == prob['answer']:
-            results['correct_upperbound'] += 1
-    total = results['total']
-    print(f'Total: {total}, Our-Correct: {results["our_correct"]}, Accuracy: {results["our_correct"] / total * 100:.2f}%')
-    print(f'Total: {total}, GPT-4-Correct: {results["gpt4_correct"]}, Accuracy: {results["gpt4_correct"] / total * 100:.2f}%')
-    print(f'Total: {total}, GPT-4 NO-ANS (RANDOM): {results["gpt4_failed"]}, Percentage: {results["gpt4_failed"] / total * 100:.2f}%')
-    print(f'Total: {total}, GPT-4-OursVisual-Correct: {results["gpt4_ourvisual_correct"]}, Accuracy: {results["gpt4_ourvisual_correct"] / total * 100:.2f}%')
-    print(f'Total: {total}, Requery-Correct: {results["requery_correct"]}, Accuracy: {results["requery_correct"] / total * 100:.2f}%')
-    print(f'Total: {total}, Correct upper: {results["correct_upperbound"]}, Accuracy: {results["correct_upperbound"] / total * 100:.2f}%')
-    sqa_results['acc'] = results["requery_correct"] / total * 100
-    sqa_results['correct'] = results["requery_correct"]
-    sqa_results['count'] = total
-    with open(args.output_result, 'w') as f:
-        json.dump(sqa_results, f, indent=2)

LLAVA_Biovil/llava/eval/eval_textvqa.py DELETED Viewed

@@ -1,65 +0,0 @@
-import os
-import argparse
-import json
-import re
-from LLAV.llava.eval.m4c_evaluator import TextVQAAccuracyEvaluator
-def get_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--annotation-file', type=str)
-    parser.add_argument('--result-file', type=str)
-    parser.add_argument('--result-dir', type=str)
-    return parser.parse_args()
-def prompt_processor(prompt):
-    if prompt.startswith('OCR tokens: '):
-        pattern = r"Question: (.*?) Short answer:"
-        match = re.search(pattern, prompt, re.DOTALL)
-        question = match.group(1)
-    elif 'Reference OCR token: ' in prompt and len(prompt.split('\n')) == 3:
-        if prompt.startswith('Reference OCR token:'):
-            question = prompt.split('\n')[1]
-        else:
-            question = prompt.split('\n')[0]
-    elif len(prompt.split('\n')) == 2:
-        question = prompt.split('\n')[0]
-    else:
-        assert False
-    return question.lower()
-def eval_single(annotation_file, result_file):
-    experiment_name = os.path.splitext(os.path.basename(result_file))[0]
-    print(experiment_name)
-    annotations = json.load(open(annotation_file))['data']
-    annotations = {(annotation['image_id'], annotation['question'].lower()): annotation for annotation in annotations}
-    results = [json.loads(line) for line in open(result_file)]
-    pred_list = []
-    for result in results:
-        annotation = annotations[(result['question_id'], prompt_processor(result['prompt']))]
-        pred_list.append({
-            "pred_answer": result['text'],
-            "gt_answers": annotation['answers'],
-        })
-    evaluator = TextVQAAccuracyEvaluator()
-    print('Samples: {}\nAccuracy: {:.2f}%\n'.format(len(pred_list), 100. * evaluator.eval_pred_list(pred_list)))
-if __name__ == "__main__":
-    args = get_args()
-    if args.result_file is not None:
-        eval_single(args.annotation_file, args.result_file)
-    if args.result_dir is not None:
-        for result_file in sorted(os.listdir(args.result_dir)):
-            if not result_file.endswith('.jsonl'):
-                print(f'Skipping {result_file}')
-                continue
-            eval_single(args.annotation_file, os.path.join(args.result_dir, result_file))

LLAVA_Biovil/llava/eval/generate_webpage_data_from_table.py DELETED Viewed

@@ -1,111 +0,0 @@
-"""Generate json file for webpage."""
-import json
-import os
-import re
-# models = ['llama', 'alpaca', 'gpt35', 'bard']
-models = ['vicuna']
-def read_jsonl(path: str, key: str=None):
-    data = []
-    with open(os.path.expanduser(path)) as f:
-        for line in f:
-            if not line:
-                continue
-            data.append(json.loads(line))
-    if key is not None:
-        data.sort(key=lambda x: x[key])
-        data = {item[key]: item for item in data}
-    return data
-def trim_hanging_lines(s: str, n: int) -> str:
-    s = s.strip()
-    for _ in range(n):
-        s = s.split('\n', 1)[1].strip()
-    return s
-if __name__ == '__main__':
-    questions = read_jsonl('table/question.jsonl', key='question_id')
-    # alpaca_answers = read_jsonl('table/answer/answer_alpaca-13b.jsonl', key='question_id')
-    # bard_answers = read_jsonl('table/answer/answer_bard.jsonl', key='question_id')
-    # gpt35_answers = read_jsonl('table/answer/answer_gpt35.jsonl', key='question_id')
-    # llama_answers = read_jsonl('table/answer/answer_llama-13b.jsonl', key='question_id')
-    vicuna_answers = read_jsonl('table/answer/answer_vicuna-13b.jsonl', key='question_id')
-    ours_answers = read_jsonl('table/results/llama-13b-hf-alpaca.jsonl', key='question_id')
-    review_vicuna = read_jsonl('table/review/review_vicuna-13b_llama-13b-hf-alpaca.jsonl', key='question_id')
-    # review_alpaca = read_jsonl('table/review/review_alpaca-13b_vicuna-13b.jsonl', key='question_id')
-    # review_bard = read_jsonl('table/review/review_bard_vicuna-13b.jsonl', key='question_id')
-    # review_gpt35 = read_jsonl('table/review/review_gpt35_vicuna-13b.jsonl', key='question_id')
-    # review_llama = read_jsonl('table/review/review_llama-13b_vicuna-13b.jsonl', key='question_id')
-    records = []
-    for qid in questions.keys():
-        r = {
-            'id': qid,
-            'category': questions[qid]['category'],
-            'question': questions[qid]['text'],
-            'answers': {
-                # 'alpaca': alpaca_answers[qid]['text'],
-                # 'llama': llama_answers[qid]['text'],
-                # 'bard': bard_answers[qid]['text'],
-                # 'gpt35': gpt35_answers[qid]['text'],
-                'vicuna': vicuna_answers[qid]['text'],
-                'ours': ours_answers[qid]['text'],
-            },
-            'evaluations': {
-                # 'alpaca': review_alpaca[qid]['text'],
-                # 'llama': review_llama[qid]['text'],
-                # 'bard': review_bard[qid]['text'],
-                'vicuna': review_vicuna[qid]['content'],
-                # 'gpt35': review_gpt35[qid]['text'],
-            },
-            'scores': {
-                'vicuna': review_vicuna[qid]['tuple'],
-                # 'alpaca': review_alpaca[qid]['score'],
-                # 'llama': review_llama[qid]['score'],
-                # 'bard': review_bard[qid]['score'],
-                # 'gpt35': review_gpt35[qid]['score'],
-            },
-        }
-        # cleanup data
-        cleaned_evals = {}
-        for k, v in r['evaluations'].items():
-            v = v.strip()
-            lines = v.split('\n')
-            # trim the first line if it's a pair of numbers
-            if re.match(r'\d+[, ]+\d+', lines[0]):
-                lines = lines[1:]
-            v = '\n'.join(lines)
-            cleaned_evals[k] = v.replace('Assistant 1', "**Assistant 1**").replace('Assistant 2', '**Assistant 2**')
-        r['evaluations'] = cleaned_evals
-        records.append(r)
-    # Reorder the records, this is optional
-    for r in records:
-        if r['id'] <= 20:
-            r['id'] += 60
-        else:
-            r['id'] -= 20
-    for r in records:
-        if r['id'] <= 50:
-            r['id'] += 10
-        elif 50 < r['id'] <= 60:
-            r['id'] -= 50
-    for r in records:
-        if r['id'] == 7:
-            r['id'] = 1
-        elif r['id'] < 7:
-            r['id'] += 1
-    records.sort(key=lambda x: x['id'])
-    # Write to file
-    with open('webpage/data.json', 'w') as f:
-        json.dump({'questions': records, 'models': models}, f, indent=2)

LLAVA_Biovil/llava/eval/m4c_evaluator.py DELETED Viewed

@@ -1,334 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-import re
-from tqdm import tqdm
-class EvalAIAnswerProcessor:
-    """
-    Processes an answer similar to Eval AI
-        copied from
-        https://github.com/facebookresearch/mmf/blob/c46b3b3391275b4181567db80943473a89ab98ab/pythia/tasks/processors.py#L897
-    """
-    CONTRACTIONS = {
-        "aint": "ain't",
-        "arent": "aren't",
-        "cant": "can't",
-        "couldve": "could've",
-        "couldnt": "couldn't",
-        "couldn'tve": "couldn't've",
-        "couldnt've": "couldn't've",
-        "didnt": "didn't",
-        "doesnt": "doesn't",
-        "dont": "don't",
-        "hadnt": "hadn't",
-        "hadnt've": "hadn't've",
-        "hadn'tve": "hadn't've",
-        "hasnt": "hasn't",
-        "havent": "haven't",
-        "hed": "he'd",
-        "hed've": "he'd've",
-        "he'dve": "he'd've",
-        "hes": "he's",
-        "howd": "how'd",
-        "howll": "how'll",
-        "hows": "how's",
-        "Id've": "I'd've",
-        "I'dve": "I'd've",
-        "Im": "I'm",
-        "Ive": "I've",
-        "isnt": "isn't",
-        "itd": "it'd",
-        "itd've": "it'd've",
-        "it'dve": "it'd've",
-        "itll": "it'll",
-        "let's": "let's",
-        "maam": "ma'am",
-        "mightnt": "mightn't",
-        "mightnt've": "mightn't've",
-        "mightn'tve": "mightn't've",
-        "mightve": "might've",
-        "mustnt": "mustn't",
-        "mustve": "must've",
-        "neednt": "needn't",
-        "notve": "not've",
-        "oclock": "o'clock",
-        "oughtnt": "oughtn't",
-        "ow's'at": "'ow's'at",
-        "'ows'at": "'ow's'at",
-        "'ow'sat": "'ow's'at",
-        "shant": "shan't",
-        "shed've": "she'd've",
-        "she'dve": "she'd've",
-        "she's": "she's",
-        "shouldve": "should've",
-        "shouldnt": "shouldn't",
-        "shouldnt've": "shouldn't've",
-        "shouldn'tve": "shouldn't've",
-        "somebody'd": "somebodyd",
-        "somebodyd've": "somebody'd've",
-        "somebody'dve": "somebody'd've",
-        "somebodyll": "somebody'll",
-        "somebodys": "somebody's",
-        "someoned": "someone'd",
-        "someoned've": "someone'd've",
-        "someone'dve": "someone'd've",
-        "someonell": "someone'll",
-        "someones": "someone's",
-        "somethingd": "something'd",
-        "somethingd've": "something'd've",
-        "something'dve": "something'd've",
-        "somethingll": "something'll",
-        "thats": "that's",
-        "thered": "there'd",
-        "thered've": "there'd've",
-        "there'dve": "there'd've",
-        "therere": "there're",
-        "theres": "there's",
-        "theyd": "they'd",
-        "theyd've": "they'd've",
-        "they'dve": "they'd've",
-        "theyll": "they'll",
-        "theyre": "they're",
-        "theyve": "they've",
-        "twas": "'twas",
-        "wasnt": "wasn't",
-        "wed've": "we'd've",
-        "we'dve": "we'd've",
-        "weve": "we've",
-        "werent": "weren't",
-        "whatll": "what'll",
-        "whatre": "what're",
-        "whats": "what's",
-        "whatve": "what've",
-        "whens": "when's",
-        "whered": "where'd",
-        "wheres": "where's",
-        "whereve": "where've",
-        "whod": "who'd",
-        "whod've": "who'd've",
-        "who'dve": "who'd've",
-        "wholl": "who'll",
-        "whos": "who's",
-        "whove": "who've",
-        "whyll": "why'll",
-        "whyre": "why're",
-        "whys": "why's",
-        "wont": "won't",
-        "wouldve": "would've",
-        "wouldnt": "wouldn't",
-        "wouldnt've": "wouldn't've",
-        "wouldn'tve": "wouldn't've",
-        "yall": "y'all",
-        "yall'll": "y'all'll",
-        "y'allll": "y'all'll",
-        "yall'd've": "y'all'd've",
-        "y'alld've": "y'all'd've",
-        "y'all'dve": "y'all'd've",
-        "youd": "you'd",
-        "youd've": "you'd've",
-        "you'dve": "you'd've",
-        "youll": "you'll",
-        "youre": "you're",
-        "youve": "you've",
-    }
-    NUMBER_MAP = {
-        "none": "0",
-        "zero": "0",
-        "one": "1",
-        "two": "2",
-        "three": "3",
-        "four": "4",
-        "five": "5",
-        "six": "6",
-        "seven": "7",
-        "eight": "8",
-        "nine": "9",
-        "ten": "10",
-    }
-    ARTICLES = ["a", "an", "the"]
-    PERIOD_STRIP = re.compile(r"(?!<=\d)(\.)(?!\d)")
-    COMMA_STRIP = re.compile(r"(?<=\d)(\,)+(?=\d)")
-    PUNCTUATIONS = [
-        ";",
-        r"/",
-        "[",
-        "]",
-        '"',
-        "{",
-        "}",
-        "(",
-        ")",
-        "=",
-        "+",
-        "\\",
-        "_",
-        "-",
-        ">",
-        "<",
-        "@",
-        "`",
-        ",",
-        "?",
-        "!",
-    ]
-    def __init__(self, *args, **kwargs):
-        pass
-    def word_tokenize(self, word):
-        word = word.lower()
-        word = word.replace(",", "").replace("?", "").replace("'s", " 's")
-        return word.strip()
-    def process_punctuation(self, in_text):
-        out_text = in_text
-        for p in self.PUNCTUATIONS:
-            if (p + " " in in_text or " " + p in in_text) or (
-                re.search(self.COMMA_STRIP, in_text) is not None
-            ):
-                out_text = out_text.replace(p, "")
-            else:
-                out_text = out_text.replace(p, " ")
-        out_text = self.PERIOD_STRIP.sub("", out_text, re.UNICODE)
-        return out_text
-    def process_digit_article(self, in_text):
-        out_text = []
-        temp_text = in_text.lower().split()
-        for word in temp_text:
-            word = self.NUMBER_MAP.setdefault(word, word)
-            if word not in self.ARTICLES:
-                out_text.append(word)
-            else:
-                pass
-        for word_id, word in enumerate(out_text):
-            if word in self.CONTRACTIONS:
-                out_text[word_id] = self.CONTRACTIONS[word]
-        out_text = " ".join(out_text)
-        return out_text
-    def __call__(self, item):
-        item = self.word_tokenize(item)
-        item = item.replace("\n", " ").replace("\t", " ").strip()
-        item = self.process_punctuation(item)
-        item = self.process_digit_article(item)
-        return item
-class TextVQAAccuracyEvaluator:
-    def __init__(self):
-        self.answer_processor = EvalAIAnswerProcessor()
-    def _compute_answer_scores(self, raw_answers):
-        """
-        compute the accuracy (soft score) of human answers
-        """
-        answers = [self.answer_processor(a) for a in raw_answers]
-        assert len(answers) == 10
-        gt_answers = list(enumerate(answers))
-        unique_answers = set(answers)
-        unique_answer_scores = {}
-        for unique_answer in unique_answers:
-            accs = []
-            for gt_answer in gt_answers:
-                other_answers = [item for item in gt_answers if item != gt_answer]
-                matching_answers = [
-                    item for item in other_answers if item[1] == unique_answer
-                ]
-                acc = min(1, float(len(matching_answers)) / 3)
-                accs.append(acc)
-            unique_answer_scores[unique_answer] = sum(accs) / len(accs)
-        return unique_answer_scores
-    def eval_pred_list(self, pred_list):
-        pred_scores = []
-        for entry in tqdm(pred_list):
-            pred_answer = self.answer_processor(entry["pred_answer"])
-            unique_answer_scores = self._compute_answer_scores(entry["gt_answers"])
-            score = unique_answer_scores.get(pred_answer, 0.0)
-            pred_scores.append(score)
-        accuracy = sum(pred_scores) / len(pred_scores)
-        return accuracy
-class STVQAAccuracyEvaluator:
-    def __init__(self):
-        self.answer_processor = EvalAIAnswerProcessor()
-    def eval_pred_list(self, pred_list):
-        pred_scores = []
-        for entry in pred_list:
-            pred_answer = self.answer_processor(entry["pred_answer"])
-            gts = [self.answer_processor(a) for a in entry["gt_answers"]]
-            score = 1.0 if pred_answer in gts else 0.0
-            pred_scores.append(score)
-        accuracy = sum(pred_scores) / len(pred_scores)
-        return accuracy
-class STVQAANLSEvaluator:
-    def __init__(self):
-        import editdistance  # install with `pip install editdistance`
-        self.get_edit_distance = editdistance.eval
-    def get_anls(self, s1, s2):
-        s1 = s1.lower().strip()
-        s2 = s2.lower().strip()
-        iou = 1 - self.get_edit_distance(s1, s2) / max(len(s1), len(s2))
-        anls = iou if iou >= 0.5 else 0.0
-        return anls
-    def eval_pred_list(self, pred_list):
-        pred_scores = []
-        for entry in pred_list:
-            anls = max(
-                self.get_anls(entry["pred_answer"], gt) for gt in entry["gt_answers"]
-            )
-            pred_scores.append(anls)
-        accuracy = sum(pred_scores) / len(pred_scores)
-        return accuracy
-class TextCapsBleu4Evaluator:
-    def __init__(self):
-        # The following script requires Java 1.8.0 and pycocotools installed.
-        # The pycocoevalcap can be installed with pip as
-        # pip install git+https://github.com/ronghanghu/coco-caption.git@python23
-        # Original pycocoevalcap code is at https://github.com/tylin/coco-caption
-        # but has no python3 support yet.
-        try:
-            from pycocoevalcap.bleu.bleu import Bleu
-            from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
-        except ModuleNotFoundError:
-            print(
-                "Please install pycocoevalcap module using "
-                "pip install git+https://github.com/ronghanghu/coco-caption.git@python23"  # noqa
-            )
-            raise
-        self.tokenizer = PTBTokenizer()
-        self.scorer = Bleu(4)
-    def eval_pred_list(self, pred_list):
-        # Create reference and hypotheses captions.
-        gts = {}
-        res = {}
-        for idx, entry in enumerate(pred_list):
-            gts[idx] = [{"caption": a} for a in entry["gt_answers"]]
-            res[idx] = [{"caption": entry["pred_answer"]}]
-        gts = self.tokenizer.tokenize(gts)
-        res = self.tokenizer.tokenize(res)
-        score, _ = self.scorer.compute_score(gts, res)
-        bleu4 = score[3]  # score is (Bleu-1, Bleu-2, Bleu-3, Bleu-4)
-        return bleu4

LLAVA_Biovil/llava/eval/model_qa.py DELETED Viewed

@@ -1,85 +0,0 @@
-import argparse
-from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria
-import torch
-import os
-import json
-from tqdm import tqdm
-import shortuuid
-from LLAV.llava.conversation import default_conversation
-from LLAV.llava.utils import disable_torch_init
-# new stopping implementation
-class KeywordsStoppingCriteria(StoppingCriteria):
-    def __init__(self, keywords, tokenizer, input_ids):
-        self.keywords = keywords
-        self.tokenizer = tokenizer
-        self.start_len = None
-        self.input_ids = input_ids
-    def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
-        if self.start_len is None:
-            self.start_len = self.input_ids.shape[1]
-        else:
-            outputs = self.tokenizer.batch_decode(output_ids[:, self.start_len:], skip_special_tokens=True)[0]
-            for keyword in self.keywords:
-                if keyword in outputs:
-                    return True
-        return False
-@torch.inference_mode()
-def eval_model(model_name, questions_file, answers_file):
-    # Model
-    disable_torch_init()
-    model_name = os.path.expanduser(model_name)
-    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
-    model = AutoModelForCausalLM.from_pretrained(model_name,
-        torch_dtype=torch.float16).cuda()
-    ques_file = open(os.path.expanduser(questions_file), "r")
-    ans_file = open(os.path.expanduser(answers_file), "w")
-    for i, line in enumerate(tqdm(ques_file)):
-        idx = json.loads(line)["question_id"]
-        qs = json.loads(line)["text"]
-        cat = json.loads(line)["category"]
-        conv = default_conversation.copy()
-        conv.append_message(conv.roles[0], qs)
-        prompt = conv.get_prompt()
-        inputs = tokenizer([prompt])
-        input_ids = torch.as_tensor(inputs.input_ids).cuda()
-        stopping_criteria = KeywordsStoppingCriteria([conv.sep], tokenizer, input_ids)
-        output_ids = model.generate(
-            input_ids,
-            do_sample=True,
-            use_cache=True,
-            temperature=0.7,
-            max_new_tokens=1024,
-            stopping_criteria=[stopping_criteria])
-        outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
-        try:
-            index = outputs.index(conv.sep, len(prompt))
-        except ValueError:
-            outputs += conv.sep
-            index = outputs.index(conv.sep, len(prompt))
-        outputs = outputs[len(prompt) + len(conv.roles[1]) + 2:index].strip()
-        ans_id = shortuuid.uuid()
-        ans_file.write(json.dumps({"question_id": idx,
-                                   "text": outputs,
-                                   "answer_id": ans_id,
-                                   "model_id": model_name,
-                                   "metadata": {}}) + "\n")
-        ans_file.flush()
-    ans_file.close()
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-name", type=str, default="facebook/opt-350m")
-    parser.add_argument("--question-file", type=str, default="tables/question.jsonl")
-    parser.add_argument("--answers-file", type=str, default="answer.jsonl")
-    args = parser.parse_args()
-    eval_model(args.model_name, args.question_file, args.answers_file)

LLAVA_Biovil/llava/eval/model_vqa.py DELETED Viewed

@@ -1,112 +0,0 @@
-import argparse
-import torch
-import os
-import json
-from tqdm import tqdm
-import shortuuid
-from LLAV.llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
-from LLAV.llava.conversation import conv_templates, SeparatorStyle
-from LLAV.llava.model.builder import load_pretrained_model
-from LLAV.llava.utils import disable_torch_init
-from LLAV.llava.mm_utils import tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
-from PIL import Image
-import math
-def split_list(lst, n):
-    """Split a list into n (roughly) equal-sized chunks"""
-    chunk_size = math.ceil(len(lst) / n)  # integer division
-    return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]
-def get_chunk(lst, n, k):
-    chunks = split_list(lst, n)
-    return chunks[k]
-def eval_model(args):
-    # Model
-    disable_torch_init()
-    model_path = os.path.expanduser(args.model_path)
-    model_name = get_model_name_from_path(model_path)
-    tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, args.model_base, model_name)
-    questions = [json.loads(q) for q in open(os.path.expanduser(args.question_file), "r")]
-    questions = get_chunk(questions, args.num_chunks, args.chunk_idx)
-    answers_file = os.path.expanduser(args.answers_file)
-    os.makedirs(os.path.dirname(answers_file), exist_ok=True)
-    ans_file = open(answers_file, "w")
-    for line in tqdm(questions):
-        idx = line["question_id"]
-        image_file = line["image"]
-        qs = line["text"]
-        cur_prompt = qs
-        if model.config.mm_use_im_start_end:
-            qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
-        else:
-            qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
-        conv = conv_templates[args.conv_mode].copy()
-        conv.append_message(conv.roles[0], qs)
-        conv.append_message(conv.roles[1], None)
-        prompt = conv.get_prompt()
-        input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
-        image = Image.open(os.path.join(args.image_folder, image_file))
-        image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
-        stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
-        keywords = [stop_str]
-        stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
-        with torch.inference_mode():
-            output_ids = model.generate(
-                input_ids,
-                images=image_tensor.unsqueeze(0).half().cuda(),
-                do_sample=True if args.temperature > 0 else False,
-                temperature=args.temperature,
-                top_p=args.top_p,
-                num_beams=args.num_beams,
-                # no_repeat_ngram_size=3,
-                max_new_tokens=1024,
-                use_cache=True)
-        input_token_len = input_ids.shape[1]
-        n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
-        if n_diff_input_output > 0:
-            print(f'[Warning] {n_diff_input_output} output_ids are not the same as the input_ids')
-        outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0]
-        outputs = outputs.strip()
-        if outputs.endswith(stop_str):
-            outputs = outputs[:-len(stop_str)]
-        outputs = outputs.strip()
-        ans_id = shortuuid.uuid()
-        ans_file.write(json.dumps({"question_id": idx,
-                                   "prompt": cur_prompt,
-                                   "text": outputs,
-                                   "answer_id": ans_id,
-                                   "model_id": model_name,
-                                   "metadata": {}}) + "\n")
-        ans_file.flush()
-    ans_file.close()
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
-    parser.add_argument("--model-base", type=str, default=None)
-    parser.add_argument("--image-folder", type=str, default="")
-    parser.add_argument("--question-file", type=str, default="tables/question.jsonl")
-    parser.add_argument("--answers-file", type=str, default="answer.jsonl")
-    parser.add_argument("--conv-mode", type=str, default="llava_v1")
-    parser.add_argument("--num-chunks", type=int, default=1)
-    parser.add_argument("--chunk-idx", type=int, default=0)
-    parser.add_argument("--temperature", type=float, default=0.2)
-    parser.add_argument("--top_p", type=float, default=None)
-    parser.add_argument("--num_beams", type=int, default=1)
-    args = parser.parse_args()
-    eval_model(args)

LLAVA_Biovil/llava/eval/model_vqa_loader.py DELETED Viewed

@@ -1,141 +0,0 @@
-import argparse
-import torch
-import os
-import json
-from tqdm import tqdm
-import shortuuid
-from LLAV.llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
-from LLAV.llava.conversation import conv_templates
-from LLAV.llava.model.builder import load_pretrained_model
-from LLAV.llava.utils import disable_torch_init
-from LLAV.llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
-from torch.utils.data import Dataset, DataLoader
-from PIL import Image
-import math
-def split_list(lst, n):
-    """Split a list into n (roughly) equal-sized chunks"""
-    chunk_size = math.ceil(len(lst) / n)  # integer division
-    return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]
-def get_chunk(lst, n, k):
-    chunks = split_list(lst, n)
-    return chunks[k]
-# Custom dataset class
-class CustomDataset(Dataset):
-    def __init__(self, questions, image_folder, tokenizer, image_processor, model_config):
-        self.questions = questions
-        self.image_folder = image_folder
-        self.tokenizer = tokenizer
-        self.image_processor = image_processor
-        self.model_config = model_config
-    def __getitem__(self, index):
-        line = self.questions[index]
-        image_file = line["image"]
-        qs = line["text"]
-        if self.model_config.mm_use_im_start_end:
-            qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
-        else:
-            qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
-        conv = conv_templates[args.conv_mode].copy()
-        conv.append_message(conv.roles[0], qs)
-        conv.append_message(conv.roles[1], None)
-        prompt = conv.get_prompt()
-        image = Image.open(os.path.join(self.image_folder, image_file)).convert('RGB')
-        image_tensor = process_images([image], self.image_processor, self.model_config)[0]
-        input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt')
-        return input_ids, image_tensor
-    def __len__(self):
-        return len(self.questions)
-# DataLoader
-def create_data_loader(questions, image_folder, tokenizer, image_processor, model_config, batch_size=1, num_workers=4):
-    assert batch_size == 1, "batch_size must be 1"
-    dataset = CustomDataset(questions, image_folder, tokenizer, image_processor, model_config)
-    data_loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False)
-    return data_loader
-def eval_model(args):
-    # Model
-    disable_torch_init()
-    model_path = os.path.expanduser(args.model_path)
-    model_name = get_model_name_from_path(model_path)
-    tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, args.model_base, model_name)
-    questions = [json.loads(q) for q in open(os.path.expanduser(args.question_file), "r")]
-    questions = get_chunk(questions, args.num_chunks, args.chunk_idx)
-    answers_file = os.path.expanduser(args.answers_file)
-    os.makedirs(os.path.dirname(answers_file), exist_ok=True)
-    ans_file = open(answers_file, "w")
-    if 'plain' in model_name and 'finetune' not in model_name.lower() and 'mmtag' not in args.conv_mode:
-        args.conv_mode = args.conv_mode + '_mmtag'
-        print(f'It seems that this is a plain model, but it is not using a mmtag prompt, auto switching to {args.conv_mode}.')
-    data_loader = create_data_loader(questions, args.image_folder, tokenizer, image_processor, model.config)
-    for (input_ids, image_tensor), line in tqdm(zip(data_loader, questions), total=len(questions)):
-        idx = line["question_id"]
-        cur_prompt = line["text"]
-        input_ids = input_ids.to(device='cuda', non_blocking=True)
-        with torch.inference_mode():
-            output_ids = model.generate(
-                input_ids,
-                images=image_tensor.to(dtype=torch.float16, device='cuda', non_blocking=True),
-                do_sample=True if args.temperature > 0 else False,
-                temperature=args.temperature,
-                top_p=args.top_p,
-                num_beams=args.num_beams,
-                max_new_tokens=args.max_new_tokens,
-                use_cache=True)
-        input_token_len = input_ids.shape[1]
-        n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
-        if n_diff_input_output > 0:
-            print(f'[Warning] {n_diff_input_output} output_ids are not the same as the input_ids')
-        outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0]
-        outputs = outputs.strip()
-        ans_id = shortuuid.uuid()
-        ans_file.write(json.dumps({"question_id": idx,
-                                   "prompt": cur_prompt,
-                                   "text": outputs,
-                                   "answer_id": ans_id,
-                                   "model_id": model_name,
-                                   "metadata": {}}) + "\n")
-        # ans_file.flush()
-    ans_file.close()
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
-    parser.add_argument("--model-base", type=str, default=None)
-    parser.add_argument("--image-folder", type=str, default="")
-    parser.add_argument("--question-file", type=str, default="tables/question.jsonl")
-    parser.add_argument("--answers-file", type=str, default="answer.jsonl")
-    parser.add_argument("--conv-mode", type=str, default="llava_v1")
-    parser.add_argument("--num-chunks", type=int, default=1)
-    parser.add_argument("--chunk-idx", type=int, default=0)
-    parser.add_argument("--temperature", type=float, default=0.2)
-    parser.add_argument("--top_p", type=float, default=None)
-    parser.add_argument("--num_beams", type=int, default=1)
-    parser.add_argument("--max_new_tokens", type=int, default=128)
-    args = parser.parse_args()
-    eval_model(args)

LLAVA_Biovil/llava/eval/model_vqa_mmbench.py DELETED Viewed

@@ -1,169 +0,0 @@
-import argparse
-import torch
-import os
-import json
-import pandas as pd
-from tqdm import tqdm
-import shortuuid
-from LLAV.llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
-from LLAV.llava.conversation import conv_templates, SeparatorStyle
-from LLAV.llava.model.builder import load_pretrained_model
-from LLAV.llava.utils import disable_torch_init
-from LLAV.llava.mm_utils import tokenizer_image_token, process_images, load_image_from_base64, get_model_name_from_path
-import math
-all_options = ['A', 'B', 'C', 'D']
-def split_list(lst, n):
-    """Split a list into n (roughly) equal-sized chunks"""
-    chunk_size = math.ceil(len(lst) / n)  # integer division
-    return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]
-def get_chunk(lst, n, k):
-    chunks = split_list(lst, n)
-    return chunks[k]
-def is_none(value):
-    if value is None:
-        return True
-    if type(value) is float and math.isnan(value):
-        return True
-    if type(value) is str and value.lower() == 'nan':
-        return True
-    if type(value) is str and value.lower() == 'none':
-        return True
-    return False
-def get_options(row, options):
-    parsed_options = []
-    for option in options:
-        option_value = row[option]
-        if is_none(option_value):
-            break
-        parsed_options.append(option_value)
-    return parsed_options
-def eval_model(args):
-    # Model
-    disable_torch_init()
-    model_path = os.path.expanduser(args.model_path)
-    model_name = get_model_name_from_path(model_path)
-    tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, args.model_base, model_name)
-    questions = pd.read_table(os.path.expanduser(args.question_file))
-    questions = get_chunk(questions, args.num_chunks, args.chunk_idx)
-    answers_file = os.path.expanduser(args.answers_file)
-    os.makedirs(os.path.dirname(answers_file), exist_ok=True)
-    ans_file = open(answers_file, "w")
-    if 'plain' in model_name and 'finetune' not in model_name.lower() and 'mmtag' not in args.conv_mode:
-        args.conv_mode = args.conv_mode + '_mmtag'
-        print(f'It seems that this is a plain model, but it is not using a mmtag prompt, auto switching to {args.conv_mode}.')
-    for index, row in tqdm(questions.iterrows(), total=len(questions)):
-        options = get_options(row, all_options)
-        cur_option_char = all_options[:len(options)]
-        if args.all_rounds:
-            num_rounds = len(options)
-        else:
-            num_rounds = 1
-        for round_idx in range(num_rounds):
-            idx = row['index']
-            question = row['question']
-            hint = row['hint']
-            image = load_image_from_base64(row['image'])
-            if not is_none(hint):
-                question = hint + '\n' + question
-            for option_char, option in zip(all_options[:len(options)], options):
-                question = question + '\n' + option_char + '. ' + option
-            qs = cur_prompt = question
-            if model.config.mm_use_im_start_end:
-                qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
-            else:
-                qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
-            if args.single_pred_prompt:
-                if args.lang == 'cn':
-                    qs = qs + '\n' + "请直接回答选项字母。"
-                else:
-                    qs = qs + '\n' + "Answer with the option's letter from the given choices directly."
-            conv = conv_templates[args.conv_mode].copy()
-            conv.append_message(conv.roles[0], qs)
-            conv.append_message(conv.roles[1], None)
-            prompt = conv.get_prompt()
-            input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
-            image_tensor = process_images([image], image_processor, model.config)[0]
-            # image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
-            stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
-            with torch.inference_mode():
-                output_ids = model.generate(
-                    input_ids,
-                    images=image_tensor.unsqueeze(0).half().cuda(),
-                    do_sample=True if args.temperature > 0 else False,
-                    temperature=args.temperature,
-                    top_p=args.top_p,
-                    num_beams=args.num_beams,
-                    # no_repeat_ngram_size=3,
-                    max_new_tokens=1024,
-                    use_cache=True)
-            input_token_len = input_ids.shape[1]
-            n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
-            if n_diff_input_output > 0:
-                print(f'[Warning] {n_diff_input_output} output_ids are not the same as the input_ids')
-            outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0]
-            outputs = outputs.strip()
-            if outputs.endswith(stop_str):
-                outputs = outputs[:-len(stop_str)]
-            outputs = outputs.strip()
-            ans_id = shortuuid.uuid()
-            ans_file.write(json.dumps({"question_id": idx,
-                                    "round_id": round_idx,
-                                    "prompt": cur_prompt,
-                                    "text": outputs,
-                                    "options": options,
-                                    "option_char": cur_option_char,
-                                    "answer_id": ans_id,
-                                    "model_id": model_name,
-                                    "metadata": {}}) + "\n")
-            ans_file.flush()
-            # rotate options
-            options = options[1:] + options[:1]
-            cur_option_char = cur_option_char[1:] + cur_option_char[:1]
-    ans_file.close()
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
-    parser.add_argument("--model-base", type=str, default=None)
-    parser.add_argument("--image-folder", type=str, default="")
-    parser.add_argument("--question-file", type=str, default="tables/question.jsonl")
-    parser.add_argument("--answers-file", type=str, default="answer.jsonl")
-    parser.add_argument("--conv-mode", type=str, default="llava_v1")
-    parser.add_argument("--num-chunks", type=int, default=1)
-    parser.add_argument("--chunk-idx", type=int, default=0)
-    parser.add_argument("--temperature", type=float, default=0.2)
-    parser.add_argument("--top_p", type=float, default=None)
-    parser.add_argument("--num_beams", type=int, default=1)
-    parser.add_argument("--all-rounds", action="store_true")
-    parser.add_argument("--single-pred-prompt", action="store_true")
-    parser.add_argument("--lang", type=str, default="en")
-    args = parser.parse_args()
-    eval_model(args)

LLAVA_Biovil/llava/eval/model_vqa_qbench.py DELETED Viewed

@@ -1,120 +0,0 @@
-import argparse
-import torch
-from tqdm import tqdm
-import json
-from LLAV.llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
-from LLAV.llava.conversation import conv_templates, SeparatorStyle
-from LLAV.llava.model.builder import load_pretrained_model
-from LLAV.llava.utils import disable_torch_init
-from LLAV.llava.mm_utils import tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
-import requests
-from PIL import Image
-from io import BytesIO
-def load_image(image_file):
-    if image_file.startswith('http') or image_file.startswith('https'):
-        response = requests.get(image_file)
-        image = Image.open(BytesIO(response.content)).convert('RGB')
-    else:
-        image = Image.open(image_file).convert('RGB')
-    return image
-def eval_model(args):
-    # Model
-    disable_torch_init()
-    model_name = get_model_name_from_path(args.model_path)
-    tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, True)
-    with open(args.questions_file) as f:
-        llvqa_data = json.load(f)
-    for i, llddata in enumerate(tqdm(llvqa_data)):
-        filename = llddata["img_path"]
-        if args.lang == "en":
-            message = llddata["question"] + "\nChoose between one of the options as follows:\n"
-        elif args.lang == "zh":
-            message = llddata["question"] + "\在下列选项中选择一个:\n"
-        else:
-            raise NotImplementedError("Q-Bench does not support languages other than English (en) and Chinese (zh) yet. Contact us (https://github.com/VQAssessment/Q-Bench/) to convert  Q-Bench into more languages.")
-        for choice, ans in zip(["A.", "B.", "C.", "D."], llddata["candidates"]):
-            message += f"{choice} {ans}\n"
-        qs = message
-        if model.config.mm_use_im_start_end:
-            qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
-        else:
-            qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
-        if 'llama-2' in model_name.lower():
-            conv_mode = "llava_llama_2"
-        elif "v1" in model_name.lower():
-            conv_mode = "llava_v1"
-        elif "mpt" in model_name.lower():
-            conv_mode = "mpt"
-        else:
-            conv_mode = "llava_v0"
-        if args.conv_mode is not None and conv_mode != args.conv_mode:
-            print('[WARNING] the auto inferred conversation mode is {}, while `--conv-mode` is {}, using {}'.format(conv_mode, args.conv_mode, args.conv_mode))
-        else:
-            args.conv_mode = conv_mode
-        conv = conv_templates[args.conv_mode].copy()
-        conv.append_message(conv.roles[0], qs)
-        conv.append_message(conv.roles[1], None)
-        prompt = conv.get_prompt()
-        image = load_image(args.image_folder + filename)
-        image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'].half().cuda()
-        input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
-        stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
-        keywords = [stop_str]
-        stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
-        with torch.inference_mode():
-            output_ids = model.generate(
-                input_ids,
-                images=image_tensor,
-                num_beams=1,
-                do_sample=False,
-                temperature=0,
-                max_new_tokens=1024,
-                use_cache=True,
-                stopping_criteria=[stopping_criteria])
-        input_token_len = input_ids.shape[1]
-        n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
-        if n_diff_input_output > 0:
-            print(f'[Warning] {n_diff_input_output} output_ids are not the same as the input_ids')
-        outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0]
-        outputs = outputs.strip()
-        if outputs.endswith(stop_str):
-            outputs = outputs[:-len(stop_str)]
-        outputs = outputs.strip()
-        llddata["response"] = outputs
-        with open(args.answers_file, "a") as wf:
-            json.dump(llddata, wf)
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default="llava-v1.5")
-    parser.add_argument("--model-base", type=str, default=None)
-    parser.add_argument("--image-folder", type=str, default="./playground/data/qbench/images_llvisionqa")
-    parser.add_argument("--questions-file", type=str, default="./playground/data/qbench/llvisionqa_dev.json")
-    parser.add_argument("--answers-file", type=str, default="answer.jsonl")
-    parser.add_argument("--conv-mode", type=str, default="llava_v1")
-    parser.add_argument("--lang", type=str, default="en")
-    args = parser.parse_args()
-    eval_model(args)

LLAVA_Biovil/llava/eval/model_vqa_science.py DELETED Viewed

@@ -1,147 +0,0 @@
-import argparse
-import torch
-import os
-import json
-from tqdm import tqdm
-import shortuuid
-from LLAV.llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
-from LLAV.llava.conversation import conv_templates, SeparatorStyle
-from LLAV.llava.model.builder import load_pretrained_model
-from LLAV.llava.utils import disable_torch_init
-from LLAV.llava.mm_utils import tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
-from PIL import Image
-import math
-def split_list(lst, n):
-    """Split a list into n (roughly) equal-sized chunks"""
-    chunk_size = math.ceil(len(lst) / n)  # integer division
-    return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]
-def get_chunk(lst, n, k):
-    chunks = split_list(lst, n)
-    return chunks[k]
-def eval_model(args):
-    # Model
-    disable_torch_init()
-    model_path = os.path.expanduser(args.model_path)
-    model_name = get_model_name_from_path(model_path)
-    tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, args.model_base, model_name)
-    questions = json.load(open(os.path.expanduser(args.question_file), "r"))
-    questions = get_chunk(questions, args.num_chunks, args.chunk_idx)
-    answers_file = os.path.expanduser(args.answers_file)
-    os.makedirs(os.path.dirname(answers_file), exist_ok=True)
-    ans_file = open(answers_file, "w")
-    for i, line in enumerate(tqdm(questions)):
-        idx = line["id"]
-        question = line['conversations'][0]
-        qs = question['value'].replace('<image>', '').strip()
-        cur_prompt = qs
-        if 'image' in line:
-            image_file = line["image"]
-            image = Image.open(os.path.join(args.image_folder, image_file))
-            image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
-            images = image_tensor.unsqueeze(0).half().cuda()
-            if getattr(model.config, 'mm_use_im_start_end', False):
-                qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
-            else:
-                qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
-            cur_prompt = '<image>' + '\n' + cur_prompt
-        else:
-            images = None
-        if args.single_pred_prompt:
-            qs = qs + '\n' + "Answer with the option's letter from the given choices directly."
-            cur_prompt = cur_prompt + '\n' + "Answer with the option's letter from the given choices directly."
-        conv = conv_templates[args.conv_mode].copy()
-        conv.append_message(conv.roles[0], qs)
-        conv.append_message(conv.roles[1], None)
-        prompt = conv.get_prompt()
-        input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
-        stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
-        keywords = [stop_str]
-        stopping_criteria = [KeywordsStoppingCriteria(keywords, tokenizer, input_ids)] if conv.version == "v0" else None
-        with torch.inference_mode():
-            output_ids = model.generate(
-                input_ids,
-                images=images,
-                do_sample=True if args.temperature > 0 else False,
-                temperature=args.temperature,
-                max_new_tokens=1024,
-                use_cache=True,
-                stopping_criteria=stopping_criteria,
-            )
-        input_token_len = input_ids.shape[1]
-        n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
-        if n_diff_input_output > 0:
-            print(f'[Warning] {n_diff_input_output} output_ids are not the same as the input_ids')
-        outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0]
-        outputs = outputs.strip()
-        if outputs.endswith(stop_str):
-            outputs = outputs[:-len(stop_str)]
-        outputs = outputs.strip()
-        # prompt for answer
-        if args.answer_prompter:
-            outputs_reasoning = outputs
-            input_ids = tokenizer_image_token(prompt + outputs_reasoning + ' ###\nANSWER:', tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
-            with torch.inference_mode():
-                output_ids = model.generate(
-                    input_ids,
-                    images=images,
-                    do_sample=True if args.temperature > 0 else False,
-                    temperature=args.temperature,
-                    max_new_tokens=64,
-                    use_cache=True,
-                    stopping_criteria=[stopping_criteria])
-            input_token_len = input_ids.shape[1]
-            n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
-            if n_diff_input_output > 0:
-                print(f'[Warning] {n_diff_input_output} output_ids are not the same as the input_ids')
-            outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0]
-            outputs = outputs.strip()
-            if outputs.endswith(stop_str):
-                outputs = outputs[:-len(stop_str)]
-            outputs = outputs.strip()
-            outputs = outputs_reasoning + '\n The answer is ' + outputs
-        ans_id = shortuuid.uuid()
-        ans_file.write(json.dumps({"question_id": idx,
-                                   "prompt": cur_prompt,
-                                   "text": outputs,
-                                   "answer_id": ans_id,
-                                   "model_id": model_name,
-                                   "metadata": {}}) + "\n")
-        ans_file.flush()
-    ans_file.close()
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
-    parser.add_argument("--model-base", type=str, default=None)
-    parser.add_argument("--image-folder", type=str, default="")
-    parser.add_argument("--question-file", type=str, default="tables/question.json")
-    parser.add_argument("--answers-file", type=str, default="answer.jsonl")
-    parser.add_argument("--conv-mode", type=str, default="llava_v0")
-    parser.add_argument("--num-chunks", type=int, default=1)
-    parser.add_argument("--chunk-idx", type=int, default=0)
-    parser.add_argument("--temperature", type=float, default=0.2)
-    parser.add_argument("--answer-prompter", action="store_true")
-    parser.add_argument("--single-pred-prompt", action="store_true")
-    args = parser.parse_args()
-    eval_model(args)

LLAVA_Biovil/llava/eval/qa_baseline_gpt35.py DELETED Viewed

@@ -1,74 +0,0 @@
-"""Generate answers with GPT-3.5"""
-# Note: you need to be using OpenAI Python v0.27.0 for the code below to work
-import argparse
-import json
-import os
-import time
-import concurrent.futures
-import openai
-import tqdm
-import shortuuid
-MODEL = 'gpt-3.5-turbo'
-MODEL_ID = 'gpt-3.5-turbo:20230327'
-def get_answer(question_id: int, question: str, max_tokens: int):
-    ans = {
-        'answer_id': shortuuid.uuid(),
-        'question_id': question_id,
-        'model_id': MODEL_ID,
-    }
-    for _ in range(3):
-        try:
-            response = openai.ChatCompletion.create(
-                model=MODEL,
-                messages=[{
-                    'role': 'system',
-                    'content': 'You are a helpful assistant.'
-                }, {
-                    'role': 'user',
-                    'content': question,
-                }],
-                max_tokens=max_tokens,
-            )
-            ans['text'] = response['choices'][0]['message']['content']
-            return ans
-        except Exception as e:
-            print('[ERROR]', e)
-            ans['text'] = '#ERROR#'
-            time.sleep(1)
-    return ans
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='ChatGPT answer generation.')
-    parser.add_argument('-q', '--question')
-    parser.add_argument('-o', '--output')
-    parser.add_argument('--max-tokens', type=int, default=1024, help='maximum number of tokens produced in the output')
-    args = parser.parse_args()
-    questions_dict = {}
-    with open(os.path.expanduser(args.question)) as f:
-        for line in f:
-            if not line:
-                continue
-            q = json.loads(line)
-            questions_dict[q['question_id']] = q['text']
-    answers = []
-    with concurrent.futures.ThreadPoolExecutor(max_workers=32) as executor:
-        futures = []
-        for qid, question in questions_dict.items():
-            future = executor.submit(get_answer, qid, question, args.max_tokens)
-            futures.append(future)
-        for future in tqdm.tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
-            answers.append(future.result())
-    answers.sort(key=lambda x: x['question_id'])
-    with open(os.path.expanduser(args.output), 'w') as f:
-        table = [json.dumps(ans) for ans in answers]
-        f.write('\n'.join(table))

LLAVA_Biovil/llava/eval/run_llava.py DELETED Viewed

@@ -1,155 +0,0 @@
-import argparse
-import torch
-from LLAV.llava.constants import (
-    IMAGE_TOKEN_INDEX,
-    DEFAULT_IMAGE_TOKEN,
-    DEFAULT_IM_START_TOKEN,
-    DEFAULT_IM_END_TOKEN,
-    IMAGE_PLACEHOLDER,
-)
-from LLAV.llava.conversation import conv_templates, SeparatorStyle
-from LLAV.llava.model.builder import load_pretrained_model
-from LLAV.llava.utils import disable_torch_init
-from LLAV.llava.mm_utils import (
-    process_images,
-    tokenizer_image_token,
-    get_model_name_from_path,
-    KeywordsStoppingCriteria,
-)
-import requests
-from PIL import Image
-from io import BytesIO
-import re
-def image_parser(args):
-    out = args.image_file.split(args.sep)
-    return out
-def load_image(image_file):
-    if image_file.startswith("http") or image_file.startswith("https"):
-        response = requests.get(image_file)
-        image = Image.open(BytesIO(response.content)).convert("RGB")
-    else:
-        image = Image.open(image_file).convert("RGB")
-    return image
-def load_images(image_files):
-    out = []
-    for image_file in image_files:
-        image = load_image(image_file)
-        out.append(image)
-    return out
-def eval_model(args):
-    # Model
-    disable_torch_init()
-    model_name = get_model_name_from_path(args.model_path)
-    tokenizer, model, image_processor, context_len = load_pretrained_model(
-        args.model_path, args.model_base, model_name
-    )
-    qs = args.query
-    image_token_se = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN
-    if IMAGE_PLACEHOLDER in qs:
-        if model.config.mm_use_im_start_end:
-            qs = re.sub(IMAGE_PLACEHOLDER, image_token_se, qs)
-        else:
-            qs = re.sub(IMAGE_PLACEHOLDER, DEFAULT_IMAGE_TOKEN, qs)
-    else:
-        if model.config.mm_use_im_start_end:
-            qs = image_token_se + "\n" + qs
-        else:
-            qs = DEFAULT_IMAGE_TOKEN + "\n" + qs
-    if "llama-2" in model_name.lower():
-        conv_mode = "llava_llama_2"
-    elif "v1" in model_name.lower():
-        conv_mode = "llava_v1"
-    elif "mpt" in model_name.lower():
-        conv_mode = "mpt"
-    else:
-        conv_mode = "llava_v0"
-    if args.conv_mode is not None and conv_mode != args.conv_mode:
-        print(
-            "[WARNING] the auto inferred conversation mode is {}, while `--conv-mode` is {}, using {}".format(
-                conv_mode, args.conv_mode, args.conv_mode
-            )
-        )
-    else:
-        args.conv_mode = conv_mode
-    conv = conv_templates[args.conv_mode].copy()
-    conv.append_message(conv.roles[0], qs)
-    conv.append_message(conv.roles[1], None)
-    prompt = conv.get_prompt()
-    image_files = image_parser(args)
-    images = load_images(image_files)
-    images_tensor = process_images(
-        images,
-        image_processor,
-        model.config
-    ).to(model.device, dtype=torch.float16)
-    input_ids = (
-        tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
-        .unsqueeze(0)
-        .cuda()
-    )
-    stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
-    keywords = [stop_str]
-    stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
-    with torch.inference_mode():
-        output_ids = model.generate(
-            input_ids,
-            images=images_tensor,
-            do_sample=True if args.temperature > 0 else False,
-            temperature=args.temperature,
-            top_p=args.top_p,
-            num_beams=args.num_beams,
-            max_new_tokens=args.max_new_tokens,
-            use_cache=True,
-            stopping_criteria=[stopping_criteria],
-        )
-    input_token_len = input_ids.shape[1]
-    n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
-    if n_diff_input_output > 0:
-        print(
-            f"[Warning] {n_diff_input_output} output_ids are not the same as the input_ids"
-        )
-    outputs = tokenizer.batch_decode(
-        output_ids[:, input_token_len:], skip_special_tokens=True
-    )[0]
-    outputs = outputs.strip()
-    if outputs.endswith(stop_str):
-        outputs = outputs[: -len(stop_str)]
-    outputs = outputs.strip()
-    print(outputs)
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
-    parser.add_argument("--model-base", type=str, default=None)
-    parser.add_argument("--image-file", type=str, required=True)
-    parser.add_argument("--query", type=str, required=True)
-    parser.add_argument("--conv-mode", type=str, default=None)
-    parser.add_argument("--sep", type=str, default=",")
-    parser.add_argument("--temperature", type=float, default=0.2)
-    parser.add_argument("--top_p", type=float, default=None)
-    parser.add_argument("--num_beams", type=int, default=1)
-    parser.add_argument("--max_new_tokens", type=int, default=512)
-    args = parser.parse_args()
-    eval_model(args)

LLAVA_Biovil/llava/eval/summarize_gpt_review.py DELETED Viewed

@@ -1,60 +0,0 @@
-import json
-import os
-from collections import defaultdict
-import numpy as np
-import argparse
-def parse_args():
-    parser = argparse.ArgumentParser(description='ChatGPT-based QA evaluation.')
-    parser.add_argument('-d', '--dir', default=None)
-    parser.add_argument('-v', '--version', default=None)
-    parser.add_argument('-s', '--select', nargs='*', default=None)
-    parser.add_argument('-f', '--files', nargs='*', default=[])
-    parser.add_argument('-i', '--ignore', nargs='*', default=[])
-    return parser.parse_args()
-if __name__ == '__main__':
-    args = parse_args()
-    if args.ignore is not None:
-        args.ignore = [int(x) for x in args.ignore]
-    if len(args.files) > 0:
-        review_files = args.files
-    else:
-        review_files = [x for x in os.listdir(args.dir) if x.endswith('.jsonl') and (x.startswith('gpt4_text') or x.startswith('reviews_') or x.startswith('review_') or 'review' in args.dir)]
-    for review_file in sorted(review_files):
-        config = os.path.basename(review_file).replace('gpt4_text_', '').replace('.jsonl', '')
-        if args.select is not None and any(x not in config for x in args.select):
-            continue
-        if '0613' in config:
-            version = '0613'
-        else:
-            version = '0314'
-        if args.version is not None and args.version != version:
-            continue
-        scores = defaultdict(list)
-        print(config)
-        with open(os.path.join(args.dir, review_file) if args.dir is not None else review_file) as f:
-            for review_str in f:
-                review = json.loads(review_str)
-                if review['question_id'] in args.ignore:
-                    continue
-                if 'category' in review:
-                    scores[review['category']].append(review['tuple'])
-                    scores['all'].append(review['tuple'])
-                else:
-                    if 'tuple' in review:
-                        scores['all'].append(review['tuple'])
-                    else:
-                        scores['all'].append(review['score'])
-        for k, v in sorted(scores.items()):
-            stats = np.asarray(v).mean(0).tolist()
-            stats = [round(x, 3) for x in stats]
-            # print(k, stats, round(stats[1]/stats[0]*100, 1))
-            print(k, round(stats[1]/stats[0]*100, 1), round(stats[0] * 10, 1), round(stats[1] * 10, 1))
-        print('=================================')

LLAVA_Biovil/llava/eval/webpage/figures/alpaca.png DELETED Viewed

Binary file (96.1 kB)

LLAVA_Biovil/llava/eval/webpage/figures/bard.jpg DELETED Viewed

Binary file (15.3 kB)

LLAVA_Biovil/llava/eval/webpage/figures/chatgpt.svg DELETED Viewed

LLAVA_Biovil/llava/eval/webpage/figures/llama.jpg DELETED Viewed

Binary file (56.5 kB)

LLAVA_Biovil/llava/eval/webpage/figures/swords_FILL0_wght300_GRAD0_opsz48.svg DELETED Viewed

LLAVA_Biovil/llava/eval/webpage/figures/vicuna.jpeg DELETED Viewed

Binary file (54 kB)

LLAVA_Biovil/llava/eval/webpage/index.html DELETED Viewed

@@ -1,162 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Who's GPT-4's favorite? Battles between State-of-the-Art Chatbots</title>
-    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css">
-    <link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons">
-    <link rel="stylesheet" href="styles.css">
-</head>
-<body>
-    <nav class="navbar navbar-expand-lg navbar-dark bg-dark">
-        <a class="navbar-brand" href="#">🏔️ Vicuna Evaluation Examples</a>
-        <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
-          <span class="navbar-toggler-icon"></span>
-        </button>
-        <div class="collapse navbar-collapse" id="navbarNav">
-          <ul class="navbar-nav mr-auto">
-            <li class="nav-item">
-                <a class="nav-link" href="https://chat.lmsys.org/">Demo</a>
-              </li>
-              <li class="nav-item">
-                <a class="nav-link" href="https://vicuna.lmsys.org">Blog</a>
-              </li>
-              <li class="nav-item">
-                <a class="nav-link" href="https://github.com/lm-sys/FastChat">Github</a>
-              </li>
-          </ul>
-        </div>
-    </nav>
-    <div class="container mt-5">
-        <h2 class="text-center mb-5">Who's GPT-4's favorite? Battles between State-of-the-Art Chatbots</h2>
-        <!-- Selection -->
-        <div class="form-row">
-            <div class="form-group col-md-2">
-                <label for="category-select">Category</label>
-                <select class="form-control" id="category-select"></select>
-            </div>
-            <div class="form-group col-md-8">
-                <label for="question-select">Question</label>
-                <select class="form-control" id="question-select"></select>
-            </div>
-            <div class="form-group col-md-2">
-                <div class="col-md-2"><label>&nbsp;</label></div>
-                <div class="btn-group" role="group" aria-label="Left and Right Controller">
-                    <button type="button" class="form-control btn btn-primary" id="prev-question"><i class="material-icons">keyboard_arrow_left</i></button>
-                    <button type="button" class="form-control btn btn-primary" id="next-question"><i class="material-icons">keyboard_arrow_right</i></button>
-                </div>
-            </div>
-        </div>
-        <!-- "Battle" -->
-        <div class="row mb-4" style="justify-content: center;">
-            <div class="col" style="display: flex; justify-content: center; align-items: center;">
-                <label class="adjustable-font-size" id="other-score-label">*/10</label>
-            </div>
-            <div class="col">
-                <div class="vertical-flex-layout">
-                    <img class="shadow figure-img img-fluid" src="" alt="other logo" width="150" id="other-model-figure">
-                </div>
-            </div>
-            <div class="col">
-                <div class="vertical-flex-layout">
-                    <!-- from: https://fonts.google.com/icons?icon.query=battle&selected=Material+Symbols+Outlined:swords:FILL@0;wght@300;GRAD@0;opsz@48&icon.style=Outlined -->
-                    <img class="figure-img img-fluid" src="figures/swords_FILL0_wght300_GRAD0_opsz48.svg" width="60" height="60">
-                </div>
-            </div>
-            <div class="col">
-                <div class="vertical-flex-layout">
-                    <img class="shadow figure-img img-fluid" src="figures/vicuna.jpeg" alt="vicuna logo" width="150" id="our-model-figure">
-                </div>
-            </div>
-            <div class="col" style="display: flex; justify-content: center; align-items: center;">
-                <label class="adjustable-font-size" id="our-score-label">*/10</label>
-            </div>
-        </div>
-        <!-- Question Card -->
-        <div class="card mb-4">
-            <div class="card-body" id="selected-question"></div>
-        </div>
-        <!-- Answer Cards -->
-        <div class="row">
-            <div class="col-md-6">
-                <div class="card mb-4 expandable-card">
-                    <div class="card-header" style="padding-bottom: 0.2rem" id="other-model-header-bg">
-                        <div class="row">
-                            <div class="col-md-5" style="align-items: center; display: flex;">
-                                <label id="other-model-header">Assistant #1</label>
-                            </div>
-                            <div class="col-md-7">
-                                <select class="form-control" id="model-select" style="height: fit-content; margin-top: -0.3rem;"></select>
-                            </div>
-                        </div>
-                    </div>
-                    <div class="card-body">
-                        <div class="card-text-container">
-                            <div class="card-text" id="other-model-answer"></div>
-                        </div>
-                        <div class="btn btn-primary expand-btn" style="display:flex;"></div>
-                    </div>
-                </div>
-            </div>
-            <div class="col-md-6">
-                <div class="card mb-4 expandable-card">
-                    <div class="card-header" id="our-model-header">
-                        Assistant #2 (Vicuna, our model)
-                    </div>
-                    <div class="card-body">
-                        <div class="card-text-container">
-                            <div class="card-text" id="our-model-answer"></div>
-                        </div>
-                        <div class="btn btn-primary expand-btn" style="display:flex;"></div>
-                    </div>
-                </div>
-            </div>
-        </div>
-        <!-- Evaluation -->
-        <div class="card expandable-card">
-            <div class="card-header" style="background-color: #c9c9f2;" id="evaluation-header">GPT-4 Evaluation</div>
-            <div class="card-body">
-                <div class="card-text-container">
-                    <div class="card-text" id="evaluation-result"></div>
-                </div>
-                <div class="btn btn-primary expand-btn" style="display:flex;"></div>
-            </div>
-        </div>
-    </div>
-    <div class="container-fluid bg-light py-2">
-        <div class="text-center">
-            <small class="text-muted">This website is co-authored with <a href="https://openai.com" target="_blank">GPT-4</a>.</small>
-        </div>
-    </div>
-    <!-- Marked.js -->
-    <script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/marked.umd.min.js"></script>
-    <!-- Bootstrap and Popper.js JavaScript dependencies -->
-    <script src="https://code.jquery.com/jquery-3.5.1.slim.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/@popperjs/[email protected]/dist/umd/popper.min.js"></script>
-    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"></script>
-    <script src="script.js"></script>
-    <script>
-      // Fetch the JSON file
-      fetch('data.json')
-        .then(response => response.json())
-        .then(json_data => {
-            // Populate the models and questions.
-            populateModels(json_data.models);
-            populateQuestions(json_data.questions);
-            displayQuestion(currentQuestionIndex);
-        }).catch(error => console.error(error));
-    </script>
-</body>
-</html>

LLAVA_Biovil/llava/eval/webpage/script.js DELETED Viewed

@@ -1,245 +0,0 @@
-// Description: Script for the evaluation webpage.
-let currentQuestionIndex = 1;
-// Store the model name mapping for later use.
-modelNameMapping = {
-    "gpt35": "ChatGPT-3.5",
-    "gpt4": "GPT-4",
-    "alpaca": "Alpaca-13b",
-    "vicuna": "Vicuna-13b",
-    "llama": "LLaMA-13b",
-    "bard": "Bard",
-};
-modelFigureMapping = {
-    "vicuna": "figures/vicuna.jpeg",
-    // Image from: https://commons.wikimedia.org/wiki/File:ChatGPT_logo.svg
-    "gpt35": "figures/chatgpt.svg",
-    // Image from: https://www.reddit.com/r/logodesign/comments/1128aat/google_ai_bard_logo_design/
-    "bard": "figures/bard.jpg",
-    // Image from: https://crfm.stanford.edu/2023/03/13/alpaca.html
-    "alpaca": "figures/alpaca.png",
-    // Image adapted from https://commons.wikimedia.org/wiki/File:Llama_on_Machu_Picchu.jpg
-    "llama": "figures/llama.jpg",
-}
-// Store the question data in a mapping for later use.
-questionMapping = {};
-// Store the question ids in a mapping for later use.
-categoryMapping = {};
-// Store the number of questions for later use.
-questionsCount = 0;
-function text2Markdown(text) {
-    // Normalize the text for markdown rendering.
-    text = text.trim().replaceAll('\n\n', '\n').replaceAll('\n', '\n\n');
-    return marked.parse(text);
-}
-function capitalizeFirstChar(str) {
-    if (!str || str.length === 0) {
-      return str;
-    }
-    return str.charAt(0).toUpperCase() + str.slice(1);
-}
-function updateQuestionSelect(question_id) {
-    const select = document.getElementById('question-select');
-    // Clear the question select.
-    select.innerHTML = '';
-    // Populate the question select.
-    category = questionMapping[question_id].category;
-    categoryMapping[category].forEach(question_id => {
-        const question = questionMapping[question_id];
-        const option = document.createElement('option');
-        option.value = question_id;
-        option.textContent = 'Q' + question_id.toString() + ': ' + question.question;
-        select.appendChild(option);
-    });
-    select.value = question_id;
-}
-function updateModelSelect() {
-    const select = document.getElementById('model-select');
-    img_path = modelFigureMapping[select.value];
-    document.getElementById('other-model-figure').src = img_path;
-}
-function populateModels(models) {
-    const select = document.getElementById('model-select');
-    models.forEach(model => {
-        const option = document.createElement('option');
-        option.value = model;
-        option.textContent = modelNameMapping[model];
-        select.appendChild(option);
-    });
-    updateModelSelect();
-}
-function populateQuestions(questions) {
-    const category_select = document.getElementById('category-select');
-    questionsCount = questions.length;
-    questions.forEach(question => {
-        const option = document.createElement('option');
-        // Store the question data in a mapping for later use.
-        questionMapping[question.id] = {
-            category: question.category,
-            question: question.question,
-            answers: question.answers,
-            evaluations: question.evaluations,
-            scores: question.scores,
-        };
-        // Store the question id in the category mapping.
-        if (question.category in categoryMapping) {
-            categoryMapping[question.category].push(question.id);
-        } else {
-            categoryMapping[question.category] = [question.id];
-            const category_option = document.createElement('option');
-            category_option.value = question.category;
-            category_option.textContent = capitalizeFirstChar(question.category);
-            category_select.appendChild(category_option);
-        }
-    });
-    // Set the default category.
-    updateQuestionSelect(currentQuestionIndex);
-}
-function displayQuestion(index) {
-    const question = questionMapping[index].question;
-    document.getElementById('selected-question').innerHTML = text2Markdown('**Question:** ' + question);
-    displayAnswers(index);
-}
-function displayAnswers(index) {
-    const question = questionMapping[index];
-    const otherModel = document.getElementById('model-select').value;
-    // render the answers with markdown
-    document.getElementById('other-model-answer').innerHTML = text2Markdown(question.answers[otherModel]);
-    document.getElementById('our-model-answer').innerHTML = text2Markdown(question.answers.vicuna);
-    // Display evaluation
-    score = question.scores[otherModel];
-    score_text = modelNameMapping[otherModel] + " " + score[0] + "/10, Vicuna-13b " + score[1] + "/10";
-    document.getElementById('evaluation-header').textContent = "GPT-4 Evaluation" + " (Score: " + score_text + ")";
-    document.getElementById('evaluation-result').innerHTML = text2Markdown(question.evaluations[otherModel]);
-    // Update model names
-    let assistant1_title = "Assistant #1"; // (" + modelNameMapping[otherModel] + ")";
-    let assistant2_title = "Assistant #2 (Vicuna-13b, our model)";
-    // Update scores/labels.
-    let assistant1_score_label = score[0].toString() + '/10';
-    let assistant2_score_label = score[1].toString() + '/10';
-    const colorRed ='#fa9'; // '#eb978d';
-    // const colorGreen = '#c9f2c9';
-    const colorBlue = '#8ef'; // '#71dbf9';
-    const colorYellow = '#fe7'; // '#fada57';
-    let otherModelHeaderColor = '';
-    let ourModelHeaderColor = '';
-    // Update the winner.
-    if (score[0] == score[1]) {
-        assistant1_title = '🏆 ' + assistant1_title;
-        assistant1_score_label = '🏆 ' + assistant1_score_label;
-        assistant2_title = '🏆 ' + assistant2_title;
-        assistant2_score_label = '🏆 ' + assistant2_score_label;
-        otherModelHeaderColor = colorYellow;
-        ourModelHeaderColor = colorYellow;
-    } else if (score[0] > score[1]) {
-        assistant1_title = '🏆 ' + assistant1_title;
-        assistant1_score_label = '🏆 ' + assistant1_score_label;
-        otherModelHeaderColor = colorBlue;
-        ourModelHeaderColor = colorRed;
-    } else if (score[0] < score[1]) {
-        assistant2_title = '🏆 ' + assistant2_title;
-        assistant2_score_label = '🏆 ' + assistant2_score_label;
-        otherModelHeaderColor = colorRed;
-        ourModelHeaderColor = colorBlue;
-    }
-    document.getElementById('other-model-header-bg').style.backgroundColor = otherModelHeaderColor;
-    document.getElementById('our-model-header').style.backgroundColor = ourModelHeaderColor;
-    document.getElementById('other-model-header').textContent = assistant1_title;
-    document.getElementById('our-model-header').textContent = assistant2_title;
-    document.getElementById('other-score-label').textContent = assistant1_score_label;
-    document.getElementById('our-score-label').textContent = assistant2_score_label;
-    // Update expand buttons visibility for both cards after displaying answers
-    // Reset the expanded state and update expand buttons visibility for both cards after displaying answers
-    document.querySelectorAll('.expandable-card').forEach(card => {
-        card.classList.remove('expanded');
-        updateExpandButtonVisibility(card);
-        const expandBtn = card.querySelector('.expand-btn');
-        expandBtn.innerHTML = '<i class="material-icons" style="pointer-events: none">keyboard_arrow_down</i> Show more';   // .textContent = 'Show more';
-    });
-}
-document.getElementById('question-select').addEventListener('change', e => {
-    currentQuestionIndex = parseInt(e.target.value);
-    displayQuestion(currentQuestionIndex);
-});
-document.getElementById('category-select').addEventListener('change', e => {
-    let currentCategory = e.target.value;
-    const questionIds = categoryMapping[currentCategory];
-    currentQuestionIndex = questionIds[0];
-    updateQuestionSelect(currentQuestionIndex);
-    displayQuestion(currentQuestionIndex);
-});
-// Update expand buttons whenever the model is changed
-document.getElementById('model-select').addEventListener('change', () => {
-    displayAnswers(currentQuestionIndex);
-    document.querySelectorAll('.expandable-card').forEach(card => {
-        updateExpandButtonVisibility(card);
-    });
-    updateModelSelect();
-});
-function switchQuestionAndCategory() {
-    document.getElementById('question-select').value = currentQuestionIndex;
-    old_category = document.getElementById('category-select').value;
-    new_category = questionMapping[currentQuestionIndex].category;
-    if (old_category != new_category) {
-        document.getElementById('category-select').value = new_category;
-        updateQuestionSelect(currentQuestionIndex);
-    }
-    displayQuestion(currentQuestionIndex);
-}
-document.getElementById('prev-question').addEventListener('click', () => {
-    // Question index starts from 1.
-    currentQuestionIndex = Math.max(1, currentQuestionIndex - 1);
-    switchQuestionAndCategory();
-});
-document.getElementById('next-question').addEventListener('click', () => {
-    // Question index starts from 1.
-    currentQuestionIndex = Math.min(questionsCount, currentQuestionIndex + 1);
-    switchQuestionAndCategory();
-});
-function updateExpandButtonVisibility(card) {
-    const cardTextContainer = card.querySelector('.card-text-container');
-    const expandBtn = card.querySelector('.expand-btn');
-    if (cardTextContainer.scrollHeight > cardTextContainer.offsetHeight) {
-        expandBtn.style.display = 'flex';
-    } else {
-        expandBtn.style.display = 'none';
-        card.classList.add('expanded');
-    }
-}
-document.querySelectorAll('.expand-btn').forEach(btn => {
-    btn.addEventListener('click', e => {
-        const card = e.target.closest('.expandable-card');
-        card.classList.toggle('expanded');
-        const more = '<i class="material-icons" style="pointer-events: none">keyboard_arrow_down</i> Show more';
-        const less = '<i class="material-icons" style="pointer-events: none">keyboard_arrow_up</i> Show less';
-        e.target.innerHTML = card.classList.contains('expanded') ? less : more;
-    });
-});

LLAVA_Biovil/llava/eval/webpage/styles.css DELETED Viewed

@@ -1,105 +0,0 @@
-body {
-    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-    background-color: #f8f9fa;
-}
-.navbar-dark .navbar-nav .nav-link {
-    color: #f1cf68;
-    font-size: 1.1rem;
-    padding: 0.5rem 0.6rem;
-}
-.card-header {
-    font-weight: bold;
-}
-.card {
-    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
-    transition: 0.3s;
-}
-.card:hover {
-    box-shadow: 0 8px 16px rgba(0, 0, 0, 0.2);
-}
-button {
-    transition: background-color 0.3s;
-}
-button:hover {
-    background-color: #007bff;
-}
-@media (max-width: 767px) {
-    .form-row .form-group {
-        margin-bottom: 10px;
-    }
-}
-/* Extra styles */
-.expandable-card .card-text-container {
-    max-height: 200px;
-    overflow-y: hidden;
-    position: relative;
-}
-.expandable-card.expanded .card-text-container {
-    max-height: none;
-}
-.expand-btn {
-    position: relative;
-    display: none;
-    background-color: rgba(255, 255, 255, 0.8);
-    color: #510c75;
-    border-color: transparent;
-}
-.expand-btn:hover {
-    background-color: rgba(200, 200, 200, 0.8);
-    text-decoration: none;
-    border-color: transparent;
-    color: #510c75;
-}
-.expand-btn:focus {
-    outline: none;
-    text-decoration: none;
-}
-.expandable-card:not(.expanded) .card-text-container:after {
-    content: "";
-    position: absolute;
-    bottom: 0;
-    left: 0;
-    width: 100%;
-    height: 90px;
-    background: linear-gradient(rgba(255, 255, 255, 0.2), rgba(255, 255, 255, 1));
-}
-.expandable-card:not(.expanded) .expand-btn {
-    margin-top: -40px;
-}
-.card-body {
-    padding-bottom: 5px;
-}
-.vertical-flex-layout {
-    justify-content: center;
-    align-items: center;
-    height: 100%;
-    display: flex;
-    flex-direction: column;
-    gap: 5px;
-}
-.figure-img {
-    max-width: 100%;
-    height: auto;
-}
-.adjustable-font-size {
-    font-size: calc(0.5rem + 2vw);
-}

LLAVA_Biovil/llava/mm_utils.py CHANGED Viewed

@@ -5,7 +5,7 @@ import numpy as np
 import torch
 from transformers import StoppingCriteria
-from llava.constants import IMAGE_TOKEN_INDEX
 def load_image_from_base64(image):

 import torch
 from transformers import StoppingCriteria
+from LLAVA_Biovil.llava.constants import IMAGE_TOKEN_INDEX
 def load_image_from_base64(image):

LLAVA_Biovil/llava/model/apply_delta.py CHANGED Viewed

@@ -7,7 +7,7 @@ import argparse
 import torch
 from tqdm import tqdm
 from transformers import AutoTokenizer, AutoModelForCausalLM
-from llava import LlavaLlamaForCausalLM
 def apply_delta(base_model_path, target_model_path, delta_path):

 import torch
 from tqdm import tqdm
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from LLAVA_Biovil.llava import LlavaLlamaForCausalLM
 def apply_delta(base_model_path, target_model_path, delta_path):

LLAVA_Biovil/llava/model/builder.py CHANGED Viewed

@@ -20,17 +20,17 @@ import shutil
 from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, BitsAndBytesConfig
 import torch
-from LLAVA.biovil_t.model import ImageModel
-from LLAVA.biovil_t.pretrained import _download_biovil_t_image_model_weights
-from LLAVA.biovil_t.types import ImageEncoderType
-from LLAVA.llava.model.multimodal_projector.builder import build_vision_projector
 try:
-    from LLAVA.llava.model import *
-    from LLAVA.llava.constants import DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
 except:
-    from llava.model import *
-    from llava.constants import DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
 def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, load_4bit=False, device_map="auto", device="cuda", **kwargs):

 from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, BitsAndBytesConfig
 import torch
+from LLAVA_Biovil.biovil_t.model import ImageModel
+from LLAVA_Biovil.biovil_t.pretrained import _download_biovil_t_image_model_weights
+from LLAVA_Biovil.biovil_t.types import ImageEncoderType
+from LLAVA_Biovil.llava.model.multimodal_projector.builder import build_vision_projector
 try:
+    from LLAVA_Biovil.llava.model import *
+    from LLAVA_Biovil.llava.constants import DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
 except:
+    from LLAVA_Biovil.llava.model import *
+    from LLAVA_Biovil.llava.constants import DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
 def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, load_4bit=False, device_map="auto", device="cuda", **kwargs):

LLAVA_Biovil/llava/model/consolidate.py CHANGED Viewed

@@ -6,7 +6,7 @@ import argparse
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-from LLAV.llava.model.utils import auto_upgrade
 def consolidate_ckpt(src_path, dst_path):

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from LLAVA_Biovil.llava.model.utils import auto_upgrade
 def consolidate_ckpt(src_path, dst_path):

LLAVA_Biovil/llava/model/language_model/llava_llama.py CHANGED Viewed

@@ -25,7 +25,7 @@ from transformers import AutoConfig, AutoModelForCausalLM, \
 from transformers.modeling_outputs import CausalLMOutputWithPast
-from ..llava_arch import LlavaMetaModel, LlavaMetaForCausalLM
 class LlavaConfig(LlamaConfig):

 from transformers.modeling_outputs import CausalLMOutputWithPast
+from LLAVA_Biovil.llava.llava_arch import LlavaMetaModel, LlavaMetaForCausalLM
 class LlavaConfig(LlamaConfig):

LLAVA_Biovil/llava/model/language_model/llava_mpt.py CHANGED Viewed

@@ -23,8 +23,8 @@ import math
 from transformers import AutoConfig, AutoModelForCausalLM
 from transformers.modeling_outputs import CausalLMOutputWithPast
-from .mpt.modeling_mpt import MPTConfig, MPTForCausalLM, MPTModel
-from llava.model.llava_arch import LlavaMetaModel, LlavaMetaForCausalLM
 class LlavaMPTConfig(MPTConfig):

 from transformers import AutoConfig, AutoModelForCausalLM
 from transformers.modeling_outputs import CausalLMOutputWithPast
+from LLAVA_Biovil.llava.model.language_model.mpt.modeling_mpt import MPTConfig, MPTForCausalLM, MPTModel
+from LLAVA_Biovil.llava.model.llava_arch import LlavaMetaModel, LlavaMetaForCausalLM
 class LlavaMPTConfig(MPTConfig):

LLAVA_Biovil/llava/model/llava_arch.py CHANGED Viewed

@@ -15,13 +15,13 @@ from abc import ABC, abstractmethod
 import torch
-from biovil_t.model import ImageModel
-from biovil_t.pretrained import _download_biovil_t_image_model_weights
-from biovil_t.types import ImageEncoderType
-from .multimodal_encoder.builder import build_vision_tower
-from .multimodal_projector.builder import build_vision_projector, build_image_pooler
-from llava.constants import IGNORE_INDEX, IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN

 import torch
+from LLAVA_Biovil.biovil_t.model import ImageModel
+from LLAVA_Biovil.biovil_t.pretrained import _download_biovil_t_image_model_weights
+from LLAVA_Biovil.biovil_t.types import ImageEncoderType
+from LLAVA_Biovil.llava.multimodal_encoder.builder import build_vision_tower
+from LLAVA_Biovil.llava.multimodal_projector.builder import build_vision_projector, build_image_pooler
+from LLAVA_Biovil.llava.constants import IGNORE_INDEX, IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN

LLAVA_Biovil/llava/serve/__init__.py DELETED Viewed

File without changes

LLAVA_Biovil/llava/serve/cli.py DELETED Viewed

@@ -1,122 +0,0 @@
-import argparse
-import torch
-from LLAV.llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
-from LLAV.llava.conversation import conv_templates, SeparatorStyle
-from LLAV.llava.model.builder import load_pretrained_model
-from LLAV.llava.utils import disable_torch_init
-from LLAV.llava.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
-import requests
-from PIL import Image
-from io import BytesIO
-from transformers import TextStreamer
-def load_image(image_file):
-    if image_file.startswith('http://') or image_file.startswith('https://'):
-        response = requests.get(image_file)
-        image = Image.open(BytesIO(response.content)).convert('RGB')
-    else:
-        image = Image.open(image_file).convert('RGB')
-    return image
-def main(args):
-    # Model
-    disable_torch_init()
-    model_name = get_model_name_from_path(args.model_path)
-    tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit, device=args.device)
-    if 'llama-2' in model_name.lower():
-        conv_mode = "llava_llama_2"
-    elif "v1" in model_name.lower():
-        conv_mode = "llava_v1"
-    elif "mpt" in model_name.lower():
-        conv_mode = "mpt"
-    else:
-        conv_mode = "llava_v0"
-    if args.conv_mode is not None and conv_mode != args.conv_mode:
-        print('[WARNING] the auto inferred conversation mode is {}, while `--conv-mode` is {}, using {}'.format(conv_mode, args.conv_mode, args.conv_mode))
-    else:
-        args.conv_mode = conv_mode
-    conv = conv_templates[args.conv_mode].copy()
-    if "mpt" in model_name.lower():
-        roles = ('user', 'assistant')
-    else:
-        roles = conv.roles
-    image = load_image(args.image_file)
-    # Similar operation in model_worker.py
-    image_tensor = process_images([image], image_processor, model.config)
-    if type(image_tensor) is list:
-        image_tensor = [image.to(model.device, dtype=torch.float16) for image in image_tensor]
-    else:
-        image_tensor = image_tensor.to(model.device, dtype=torch.float16)
-    while True:
-        try:
-            inp = input(f"{roles[0]}: ")
-        except EOFError:
-            inp = ""
-        if not inp:
-            print("exit...")
-            break
-        print(f"{roles[1]}: ", end="")
-        if image is not None:
-            # first message
-            if model.config.mm_use_im_start_end:
-                inp = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + inp
-            else:
-                inp = DEFAULT_IMAGE_TOKEN + '\n' + inp
-            conv.append_message(conv.roles[0], inp)
-            image = None
-        else:
-            # later messages
-            conv.append_message(conv.roles[0], inp)
-        conv.append_message(conv.roles[1], None)
-        prompt = conv.get_prompt()
-        input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to(model.device)
-        stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
-        keywords = [stop_str]
-        stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
-        streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-        with torch.inference_mode():
-            output_ids = model.generate(
-                input_ids,
-                images=image_tensor,
-                do_sample=True if args.temperature > 0 else False,
-                temperature=args.temperature,
-                max_new_tokens=args.max_new_tokens,
-                streamer=streamer,
-                use_cache=True,
-                stopping_criteria=[stopping_criteria])
-        outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
-        conv.messages[-1][-1] = outputs
-        if args.debug:
-            print("\n", {"prompt": prompt, "outputs": outputs}, "\n")
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
-    parser.add_argument("--model-base", type=str, default=None)
-    parser.add_argument("--image-file", type=str, required=True)
-    parser.add_argument("--device", type=str, default="cuda")
-    parser.add_argument("--conv-mode", type=str, default=None)
-    parser.add_argument("--temperature", type=float, default=0.2)
-    parser.add_argument("--max-new-tokens", type=int, default=512)
-    parser.add_argument("--load-8bit", action="store_true")
-    parser.add_argument("--load-4bit", action="store_true")
-    parser.add_argument("--debug", action="store_true")
-    args = parser.parse_args()
-    main(args)

LLAVA_Biovil/llava/serve/controller.py DELETED Viewed

@@ -1,296 +0,0 @@
-"""
-A controller manages distributed workers.
-It sends worker addresses to clients.
-"""
-import argparse
-import dataclasses
-from enum import Enum, auto
-import json
-import time
-from typing import List
-import threading
-from fastapi import FastAPI, Request
-from fastapi.responses import StreamingResponse
-import numpy as np
-import requests
-import uvicorn
-from llava.constants import CONTROLLER_HEART_BEAT_EXPIRATION
-from llava.utils import build_logger, server_error_msg
-logger = build_logger("controller", "controller.log")
-class DispatchMethod(Enum):
-    LOTTERY = auto()
-    SHORTEST_QUEUE = auto()
-    @classmethod
-    def from_str(cls, name):
-        if name == "lottery":
-            return cls.LOTTERY
-        elif name == "shortest_queue":
-            return cls.SHORTEST_QUEUE
-        else:
-            raise ValueError(f"Invalid dispatch method")
-@dataclasses.dataclass
-class WorkerInfo:
-    model_names: List[str]
-    speed: int
-    queue_length: int
-    check_heart_beat: bool
-    last_heart_beat: str
-def heart_beat_controller(controller):
-    while True:
-        time.sleep(CONTROLLER_HEART_BEAT_EXPIRATION)
-        controller.remove_stable_workers_by_expiration()
-class Controller:
-    def __init__(self, dispatch_method: str):
-        # Dict[str -> WorkerInfo]
-        self.worker_info = {}
-        self.dispatch_method = DispatchMethod.from_str(dispatch_method)
-        self.heart_beat_thread = threading.Thread(
-            target=heart_beat_controller, args=(self,))
-        self.heart_beat_thread.start()
-        logger.info("Init controller")
-    def register_worker(self, worker_name: str, check_heart_beat: bool,
-                        worker_status: dict):
-        if worker_name not in self.worker_info:
-            logger.info(f"Register a new worker: {worker_name}")
-        else:
-            logger.info(f"Register an existing worker: {worker_name}")
-        if not worker_status:
-            worker_status = self.get_worker_status(worker_name)
-        if not worker_status:
-            return False
-        self.worker_info[worker_name] = WorkerInfo(
-            worker_status["model_names"], worker_status["speed"], worker_status["queue_length"],
-            check_heart_beat, time.time())
-        logger.info(f"Register done: {worker_name}, {worker_status}")
-        return True
-    def get_worker_status(self, worker_name: str):
-        try:
-            r = requests.post(worker_name + "/worker_get_status", timeout=5)
-        except requests.exceptions.RequestException as e:
-            logger.error(f"Get status fails: {worker_name}, {e}")
-            return None
-        if r.status_code != 200:
-            logger.error(f"Get status fails: {worker_name}, {r}")
-            return None
-        return r.json()
-    def remove_worker(self, worker_name: str):
-        del self.worker_info[worker_name]
-    def refresh_all_workers(self):
-        old_info = dict(self.worker_info)
-        self.worker_info = {}
-        for w_name, w_info in old_info.items():
-            if not self.register_worker(w_name, w_info.check_heart_beat, None):
-                logger.info(f"Remove stale worker: {w_name}")
-    def list_models(self):
-        model_names = set()
-        for w_name, w_info in self.worker_info.items():
-            model_names.update(w_info.model_names)
-        return list(model_names)
-    def get_worker_address(self, model_name: str):
-        if self.dispatch_method == DispatchMethod.LOTTERY:
-            worker_names = []
-            worker_speeds = []
-            for w_name, w_info in self.worker_info.items():
-                if model_name in w_info.model_names:
-                    worker_names.append(w_name)
-                    worker_speeds.append(w_info.speed)
-            worker_speeds = np.array(worker_speeds, dtype=np.float32)
-            norm = np.sum(worker_speeds)
-            if norm < 1e-4:
-                return ""
-            worker_speeds = worker_speeds / norm
-            if True:  # Directly return address
-                pt = np.random.choice(np.arange(len(worker_names)),
-                    p=worker_speeds)
-                worker_name = worker_names[pt]
-                return worker_name
-            # Check status before returning
-            while True:
-                pt = np.random.choice(np.arange(len(worker_names)),
-                    p=worker_speeds)
-                worker_name = worker_names[pt]
-                if self.get_worker_status(worker_name):
-                    break
-                else:
-                    self.remove_worker(worker_name)
-                    worker_speeds[pt] = 0
-                    norm = np.sum(worker_speeds)
-                    if norm < 1e-4:
-                        return ""
-                    worker_speeds = worker_speeds / norm
-                    continue
-            return worker_name
-        elif self.dispatch_method == DispatchMethod.SHORTEST_QUEUE:
-            worker_names = []
-            worker_qlen = []
-            for w_name, w_info in self.worker_info.items():
-                if model_name in w_info.model_names:
-                    worker_names.append(w_name)
-                    worker_qlen.append(w_info.queue_length / w_info.speed)
-            if len(worker_names) == 0:
-                return ""
-            min_index = np.argmin(worker_qlen)
-            w_name = worker_names[min_index]
-            self.worker_info[w_name].queue_length += 1
-            logger.info(f"names: {worker_names}, queue_lens: {worker_qlen}, ret: {w_name}")
-            return w_name
-        else:
-            raise ValueError(f"Invalid dispatch method: {self.dispatch_method}")
-    def receive_heart_beat(self, worker_name: str, queue_length: int):
-        if worker_name not in self.worker_info:
-            logger.info(f"Receive unknown heart beat. {worker_name}")
-            return False
-        self.worker_info[worker_name].queue_length = queue_length
-        self.worker_info[worker_name].last_heart_beat = time.time()
-        logger.info(f"Receive heart beat. {worker_name}")
-        return True
-    def remove_stable_workers_by_expiration(self):
-        expire = time.time() - CONTROLLER_HEART_BEAT_EXPIRATION
-        to_delete = []
-        for worker_name, w_info in self.worker_info.items():
-            if w_info.check_heart_beat and w_info.last_heart_beat < expire:
-                to_delete.append(worker_name)
-        for worker_name in to_delete:
-            self.remove_worker(worker_name)
-    def worker_api_generate_stream(self, params):
-        worker_addr = self.get_worker_address(params["model"])
-        if not worker_addr:
-            logger.info(f"no worker: {params['model']}")
-            ret = {
-                "text": server_error_msg,
-                "error_code": 2,
-            }
-            yield json.dumps(ret).encode() + b"\0"
-        try:
-            response = requests.post(worker_addr + "/worker_generate_stream",
-                json=params, stream=True, timeout=5)
-            for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
-                if chunk:
-                    yield chunk + b"\0"
-        except requests.exceptions.RequestException as e:
-            logger.info(f"worker timeout: {worker_addr}")
-            ret = {
-                "text": server_error_msg,
-                "error_code": 3,
-            }
-            yield json.dumps(ret).encode() + b"\0"
-    # Let the controller act as a worker to achieve hierarchical
-    # management. This can be used to connect isolated sub networks.
-    def worker_api_get_status(self):
-        model_names = set()
-        speed = 0
-        queue_length = 0
-        for w_name in self.worker_info:
-            worker_status = self.get_worker_status(w_name)
-            if worker_status is not None:
-                model_names.update(worker_status["model_names"])
-                speed += worker_status["speed"]
-                queue_length += worker_status["queue_length"]
-        return {
-            "model_names": list(model_names),
-            "speed": speed,
-            "queue_length": queue_length,
-        }
-app = FastAPI()
-@app.post("/register_worker")
-async def register_worker(request: Request):
-    data = await request.json()
-    controller.register_worker(
-        data["worker_name"], data["check_heart_beat"],
-        data.get("worker_status", None))
-@app.post("/refresh_all_workers")
-async def refresh_all_workers():
-    models = controller.refresh_all_workers()
-@app.post("/list_models")
-async def list_models():
-    models = controller.list_models()
-    return {"models": models}
-@app.post("/get_worker_address")
-async def get_worker_address(request: Request):
-    data = await request.json()
-    addr = controller.get_worker_address(data["model"])
-    return {"address": addr}
-@app.post("/receive_heart_beat")
-async def receive_heart_beat(request: Request):
-    data = await request.json()
-    exist = controller.receive_heart_beat(
-        data["worker_name"], data["queue_length"])
-    return {"exist": exist}
-@app.post("/worker_generate_stream")
-async def worker_api_generate_stream(request: Request):
-    params = await request.json()
-    generator = controller.worker_api_generate_stream(params)
-    return StreamingResponse(generator)
-@app.post("/worker_get_status")
-async def worker_api_get_status(request: Request):
-    return controller.worker_api_get_status()
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--host", type=str, default="localhost")
-    parser.add_argument("--port", type=int, default=21001)
-    parser.add_argument("--dispatch-method", type=str, choices=[
-        "lottery", "shortest_queue"], default="shortest_queue")
-    args = parser.parse_args()
-    logger.info(f"args: {args}")
-    controller = Controller(args.dispatch_method)
-    uvicorn.run(app, host=args.host, port=args.port, log_level="info")

LLAVA_Biovil/llava/serve/examples/extreme_ironing.jpg DELETED Viewed

Binary file (62.6 kB)

LLAVA_Biovil/llava/serve/examples/waterview.jpg DELETED Viewed

Binary file (95.5 kB)

LLAVA_Biovil/llava/serve/gradio_web_server.py DELETED Viewed

@@ -1,470 +0,0 @@
-import argparse
-import datetime
-import json
-import os
-import time
-import gradio as gr
-import requests
-from llava.conversation import (default_conversation, conv_templates,
-                                     SeparatorStyle)
-from llava.constants import LOGDIR
-from llava.utils import (build_logger, server_error_msg,
-                              violates_moderation, moderation_msg)
-import hashlib
-logger = build_logger("gradio_web_server", "gradio_web_server.log")
-headers = {"User-Agent": "LLaVA Client"}
-no_change_btn = gr.Button.update()
-enable_btn = gr.Button.update(interactive=True)
-disable_btn = gr.Button.update(interactive=False)
-priority = {
-    "vicuna-13b": "aaaaaaa",
-    "koala-13b": "aaaaaab",
-}
-def get_conv_log_filename():
-    t = datetime.datetime.now()
-    name = os.path.join(LOGDIR, f"{t.year}-{t.month:02d}-{t.day:02d}-conv.json")
-    return name
-def get_model_list():
-    ret = requests.post(args.controller_url + "/refresh_all_workers")
-    assert ret.status_code == 200
-    ret = requests.post(args.controller_url + "/list_models")
-    models = ret.json()["models"]
-    models.sort(key=lambda x: priority.get(x, x))
-    logger.info(f"Models: {models}")
-    return models
-get_window_url_params = """
-function() {
-    const params = new URLSearchParams(window.location.search);
-    url_params = Object.fromEntries(params);
-    console.log(url_params);
-    return url_params;
-    }
-"""
-def load_demo(url_params, request: gr.Request):
-    logger.info(f"load_demo. ip: {request.client.host}. params: {url_params}")
-    dropdown_update = gr.Dropdown.update(visible=True)
-    if "model" in url_params:
-        model = url_params["model"]
-        if model in models:
-            dropdown_update = gr.Dropdown.update(
-                value=model, visible=True)
-    state = default_conversation.copy()
-    return state, dropdown_update
-def load_demo_refresh_model_list(request: gr.Request):
-    logger.info(f"load_demo. ip: {request.client.host}")
-    models = get_model_list()
-    state = default_conversation.copy()
-    dropdown_update = gr.Dropdown.update(
-        choices=models,
-        value=models[0] if len(models) > 0 else ""
-    )
-    return state, dropdown_update
-def vote_last_response(state, vote_type, model_selector, request: gr.Request):
-    with open(get_conv_log_filename(), "a") as fout:
-        data = {
-            "tstamp": round(time.time(), 4),
-            "type": vote_type,
-            "model": model_selector,
-            "state": state.dict(),
-            "ip": request.client.host,
-        }
-        fout.write(json.dumps(data) + "\n")
-def upvote_last_response(state, model_selector, request: gr.Request):
-    logger.info(f"upvote. ip: {request.client.host}")
-    vote_last_response(state, "upvote", model_selector, request)
-    return ("",) + (disable_btn,) * 3
-def downvote_last_response(state, model_selector, request: gr.Request):
-    logger.info(f"downvote. ip: {request.client.host}")
-    vote_last_response(state, "downvote", model_selector, request)
-    return ("",) + (disable_btn,) * 3
-def flag_last_response(state, model_selector, request: gr.Request):
-    logger.info(f"flag. ip: {request.client.host}")
-    vote_last_response(state, "flag", model_selector, request)
-    return ("",) + (disable_btn,) * 3
-def regenerate(state, image_process_mode, request: gr.Request):
-    logger.info(f"regenerate. ip: {request.client.host}")
-    state.messages[-1][-1] = None
-    prev_human_msg = state.messages[-2]
-    if type(prev_human_msg[1]) in (tuple, list):
-        prev_human_msg[1] = (*prev_human_msg[1][:2], image_process_mode)
-    state.skip_next = False
-    return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 5
-def clear_history(request: gr.Request):
-    logger.info(f"clear_history. ip: {request.client.host}")
-    state = default_conversation.copy()
-    return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 5
-def add_text(state, text, image, image_process_mode, request: gr.Request):
-    logger.info(f"add_text. ip: {request.client.host}. len: {len(text)}")
-    if len(text) <= 0 and image is None:
-        state.skip_next = True
-        return (state, state.to_gradio_chatbot(), "", None) + (no_change_btn,) * 5
-    if args.moderate:
-        flagged = violates_moderation(text)
-        if flagged:
-            state.skip_next = True
-            return (state, state.to_gradio_chatbot(), moderation_msg, None) + (
-                no_change_btn,) * 5
-    text = text[:1536]  # Hard cut-off
-    if image is not None:
-        text = text[:1200]  # Hard cut-off for images
-        if '<image>' not in text:
-            # text = '<Image><image></Image>' + text
-            text = text + '\n<image>'
-        text = (text, image, image_process_mode)
-        if len(state.get_images(return_pil=True)) > 0:
-            state = default_conversation.copy()
-    state.append_message(state.roles[0], text)
-    state.append_message(state.roles[1], None)
-    state.skip_next = False
-    return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 5
-def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request: gr.Request):
-    logger.info(f"http_bot. ip: {request.client.host}")
-    start_tstamp = time.time()
-    model_name = model_selector
-    if state.skip_next:
-        # This generate call is skipped due to invalid inputs
-        yield (state, state.to_gradio_chatbot()) + (no_change_btn,) * 5
-        return
-    if len(state.messages) == state.offset + 2:
-        # First round of conversation
-        if "llava" in model_name.lower():
-            if 'llama-2' in model_name.lower():
-                template_name = "llava_llama_2"
-            elif "v1" in model_name.lower():
-                if 'mmtag' in model_name.lower():
-                    template_name = "v1_mmtag"
-                elif 'plain' in model_name.lower() and 'finetune' not in model_name.lower():
-                    template_name = "v1_mmtag"
-                else:
-                    template_name = "llava_v1"
-            elif "mpt" in model_name.lower():
-                template_name = "mpt"
-            else:
-                if 'mmtag' in model_name.lower():
-                    template_name = "v0_mmtag"
-                elif 'plain' in model_name.lower() and 'finetune' not in model_name.lower():
-                    template_name = "v0_mmtag"
-                else:
-                    template_name = "llava_v0"
-        elif "mpt" in model_name:
-            template_name = "mpt_text"
-        elif "llama-2" in model_name:
-            template_name = "llama_2"
-        else:
-            template_name = "vicuna_v1"
-        new_state = conv_templates[template_name].copy()
-        new_state.append_message(new_state.roles[0], state.messages[-2][1])
-        new_state.append_message(new_state.roles[1], None)
-        state = new_state
-    # Query worker address
-    controller_url = args.controller_url
-    ret = requests.post(controller_url + "/get_worker_address",
-            json={"model": model_name})
-    worker_addr = ret.json()["address"]
-    logger.info(f"model_name: {model_name}, worker_addr: {worker_addr}")
-    # No available worker
-    if worker_addr == "":
-        state.messages[-1][-1] = server_error_msg
-        yield (state, state.to_gradio_chatbot(), disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
-        return
-    # Construct prompt
-    prompt = state.get_prompt()
-    all_images = state.get_images(return_pil=True)
-    all_image_hash = [hashlib.md5(image.tobytes()).hexdigest() for image in all_images]
-    for image, hash in zip(all_images, all_image_hash):
-        t = datetime.datetime.now()
-        filename = os.path.join(LOGDIR, "serve_images", f"{t.year}-{t.month:02d}-{t.day:02d}", f"{hash}.jpg")
-        if not os.path.isfile(filename):
-            os.makedirs(os.path.dirname(filename), exist_ok=True)
-            image.save(filename)
-    # Make requests
-    pload = {
-        "model": model_name,
-        "prompt": prompt,
-        "temperature": float(temperature),
-        "top_p": float(top_p),
-        "max_new_tokens": min(int(max_new_tokens), 1536),
-        "stop": state.sep if state.sep_style in [SeparatorStyle.SINGLE, SeparatorStyle.MPT] else state.sep2,
-        "images": f'List of {len(state.get_images())} images: {all_image_hash}',
-    }
-    logger.info(f"==== request ====\n{pload}")
-    pload['images'] = state.get_images()
-    state.messages[-1][-1] = "▌"
-    yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
-    try:
-        # Stream output
-        response = requests.post(worker_addr + "/worker_generate_stream",
-            headers=headers, json=pload, stream=True, timeout=10)
-        for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
-            if chunk:
-                data = json.loads(chunk.decode())
-                if data["error_code"] == 0:
-                    output = data["text"][len(prompt):].strip()
-                    state.messages[-1][-1] = output + "▌"
-                    yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
-                else:
-                    output = data["text"] + f" (error_code: {data['error_code']})"
-                    state.messages[-1][-1] = output
-                    yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
-                    return
-                time.sleep(0.03)
-    except requests.exceptions.RequestException as e:
-        state.messages[-1][-1] = server_error_msg
-        yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
-        return
-    state.messages[-1][-1] = state.messages[-1][-1][:-1]
-    yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
-    finish_tstamp = time.time()
-    logger.info(f"{output}")
-    with open(get_conv_log_filename(), "a") as fout:
-        data = {
-            "tstamp": round(finish_tstamp, 4),
-            "type": "chat",
-            "model": model_name,
-            "start": round(start_tstamp, 4),
-            "finish": round(finish_tstamp, 4),
-            "state": state.dict(),
-            "images": all_image_hash,
-            "ip": request.client.host,
-        }
-        fout.write(json.dumps(data) + "\n")
-title_markdown = ("""
-# 🌋 LLaVA: Large Language and Vision Assistant
-[[Project Page](https://llava-vl.github.io)] [[Code](https://github.com/haotian-liu/LLaVA)] [[Model](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md)] | 📚 [[LLaVA](https://arxiv.org/abs/2304.08485)] [[LLaVA-v1.5](https://arxiv.org/abs/2310.03744)]
-""")
-tos_markdown = ("""
-### Terms of use
-By using this service, users are required to agree to the following terms:
-The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
-Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
-For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
-""")
-learn_more_markdown = ("""
-### License
-The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
-""")
-block_css = """
-#buttons button {
-    min-width: min(120px,100%);
-}
-"""
-def build_demo(embed_mode):
-    textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
-    with gr.Blocks(title="LLaVA", theme=gr.themes.Default(), css=block_css) as demo:
-        state = gr.State()
-        if not embed_mode:
-            gr.Markdown(title_markdown)
-        with gr.Row():
-            with gr.Column(scale=3):
-                with gr.Row(elem_id="model_selector_row"):
-                    model_selector = gr.Dropdown(
-                        choices=models,
-                        value=models[0] if len(models) > 0 else "",
-                        interactive=True,
-                        show_label=False,
-                        container=False)
-                imagebox = gr.Image(type="pil")
-                image_process_mode = gr.Radio(
-                    ["Crop", "Resize", "Pad", "Default"],
-                    value="Default",
-                    label="Preprocess for non-square image", visible=False)
-                cur_dir = os.path.dirname(os.path.abspath(__file__))
-                gr.Examples(examples=[
-                    [f"{cur_dir}/examples/extreme_ironing.jpg", "What is unusual about this image?"],
-                    [f"{cur_dir}/examples/waterview.jpg", "What are the things I should be cautious about when I visit here?"],
-                ], inputs=[imagebox, textbox])
-                with gr.Accordion("Parameters", open=False) as parameter_row:
-                    temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
-                    top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
-                    max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
-            with gr.Column(scale=8):
-                chatbot = gr.Chatbot(elem_id="chatbot", label="LLaVA Chatbot", height=550)
-                with gr.Row():
-                    with gr.Column(scale=8):
-                        textbox.render()
-                    with gr.Column(scale=1, min_width=50):
-                        submit_btn = gr.Button(value="Send", variant="primary")
-                with gr.Row(elem_id="buttons") as button_row:
-                    upvote_btn = gr.Button(value="👍  Upvote", interactive=False)
-                    downvote_btn = gr.Button(value="👎  Downvote", interactive=False)
-                    flag_btn = gr.Button(value="⚠️  Flag", interactive=False)
-                    #stop_btn = gr.Button(value="⏹️  Stop Generation", interactive=False)
-                    regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=False)
-                    clear_btn = gr.Button(value="🗑️  Clear", interactive=False)
-        if not embed_mode:
-            gr.Markdown(tos_markdown)
-            gr.Markdown(learn_more_markdown)
-        url_params = gr.JSON(visible=False)
-        # Register listeners
-        btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
-        upvote_btn.click(
-            upvote_last_response,
-            [state, model_selector],
-            [textbox, upvote_btn, downvote_btn, flag_btn],
-            queue=False
-        )
-        downvote_btn.click(
-            downvote_last_response,
-            [state, model_selector],
-            [textbox, upvote_btn, downvote_btn, flag_btn],
-            queue=False
-        )
-        flag_btn.click(
-            flag_last_response,
-            [state, model_selector],
-            [textbox, upvote_btn, downvote_btn, flag_btn],
-            queue=False
-        )
-        regenerate_btn.click(
-            regenerate,
-            [state, image_process_mode],
-            [state, chatbot, textbox, imagebox] + btn_list,
-            queue=False
-        ).then(
-            http_bot,
-            [state, model_selector, temperature, top_p, max_output_tokens],
-            [state, chatbot] + btn_list
-        )
-        clear_btn.click(
-            clear_history,
-            None,
-            [state, chatbot, textbox, imagebox] + btn_list,
-            queue=False
-        )
-        textbox.submit(
-            add_text,
-            [state, textbox, imagebox, image_process_mode],
-            [state, chatbot, textbox, imagebox] + btn_list,
-            queue=False
-        ).then(
-            http_bot,
-            [state, model_selector, temperature, top_p, max_output_tokens],
-            [state, chatbot] + btn_list
-        )
-        submit_btn.click(
-            add_text,
-            [state, textbox, imagebox, image_process_mode],
-            [state, chatbot, textbox, imagebox] + btn_list,
-            queue=False
-        ).then(
-            http_bot,
-            [state, model_selector, temperature, top_p, max_output_tokens],
-            [state, chatbot] + btn_list
-        )
-        if args.model_list_mode == "once":
-            demo.load(
-                load_demo,
-                [url_params],
-                [state, model_selector],
-                _js=get_window_url_params,
-                queue=False
-            )
-        elif args.model_list_mode == "reload":
-            demo.load(
-                load_demo_refresh_model_list,
-                None,
-                [state, model_selector],
-                queue=False
-            )
-        else:
-            raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
-    return demo
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--host", type=str, default="0.0.0.0")
-    parser.add_argument("--port", type=int)
-    parser.add_argument("--controller-url", type=str, default="http://localhost:21001")
-    parser.add_argument("--concurrency-count", type=int, default=10)
-    parser.add_argument("--model-list-mode", type=str, default="once",
-        choices=["once", "reload"])
-    parser.add_argument("--share", action="store_true")
-    parser.add_argument("--moderate", action="store_true")
-    parser.add_argument("--embed", action="store_true")
-    args = parser.parse_args()
-    logger.info(f"args: {args}")
-    models = get_model_list()
-    logger.info(args)
-    demo = build_demo(args.embed)
-    demo.queue(
-        concurrency_count=args.concurrency_count,
-        api_open=False
-    ).launch(
-        server_name=args.host,
-        server_port=args.port,
-        share=args.share
-    )

LLAVA_Biovil/llava/serve/model_worker.py DELETED Viewed

@@ -1,310 +0,0 @@
-"""
-A model worker executes the model.
-"""
-import argparse
-import asyncio
-import json
-import time
-import threading
-import uuid
-from fastapi import FastAPI, Request, BackgroundTasks
-from fastapi.responses import StreamingResponse
-import requests
-import torch
-import uvicorn
-from functools import partial
-from llava.constants import WORKER_HEART_BEAT_INTERVAL
-from llava.utils import (build_logger, server_error_msg,
-                              pretty_print_semaphore)
-from llava.model.builder import load_pretrained_model
-from llava.mm_utils import process_images, load_image_from_base64, tokenizer_image_token, KeywordsStoppingCriteria, process_image_biovil, \
-    load_image_from_base64_biovil
-from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
-from transformers import TextIteratorStreamer
-from threading import Thread
-from torchvision.transforms import Compose, Resize, ToTensor, CenterCrop, transforms
-from test import ExpandChannels
-GB = 1 << 30
-worker_id = str(uuid.uuid4())[:6]
-logger = build_logger("model_worker", f"model_worker_{worker_id}.log")
-global_counter = 0
-model_semaphore = None
-def heart_beat_worker(controller):
-    while True:
-        time.sleep(WORKER_HEART_BEAT_INTERVAL)
-        controller.send_heart_beat()
-class ModelWorker:
-    def __init__(self, controller_addr, worker_addr,
-                 worker_id, no_register,
-                 model_path, model_base, model_name,
-                 load_8bit, load_4bit, device, vision_tower):
-        self.controller_addr = controller_addr
-        self.worker_addr = worker_addr
-        self.worker_id = worker_id
-        if model_path.endswith("/"):
-            model_path = model_path[:-1]
-        if model_name is None:
-            model_paths = model_path.split("/")
-            if model_paths[-1].startswith('checkpoint-'):
-                self.model_name = model_paths[-2] + "_" + model_paths[-1]
-            else:
-                self.model_name = model_paths[-1]
-        else:
-            self.model_name = model_name
-        self.device = device
-        logger.info(f"Loading the model {self.model_name} on worker {worker_id} ...")
-        self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
-            model_path, model_base, self.model_name, load_8bit, load_4bit, device=self.device)
-        self.is_multimodal = 'llava' in self.model_name.lower()
-        if not no_register:
-            self.register_to_controller()
-            self.heart_beat_thread = threading.Thread(
-                target=heart_beat_worker, args=(self,))
-            self.heart_beat_thread.start()
-        self.vision_tower = vision_tower
-        self.vis_transforms_biovil = self.create_chest_xray_transform_for_inference(512, center_crop_size=448)
-    def create_chest_xray_transform_for_inference(self, resize: int, center_crop_size: int) -> Compose:
-        """
-        Defines the image transformation pipeline for Chest-Xray datasets.
-        :param resize: The size to resize the image to. Linear resampling is used.
-                       Resizing is applied on the axis with smaller shape.
-        :param center_crop_size: The size to center crop the image to. Square crop is applied.
-        """
-        transforms = [Resize(resize), CenterCrop(center_crop_size), ToTensor(), ExpandChannels()]
-        return Compose(transforms)
-    def register_to_controller(self):
-        logger.info("Register to controller")
-        url = self.controller_addr + "/register_worker"
-        data = {
-            "worker_name": self.worker_addr,
-            "check_heart_beat": True,
-            "worker_status": self.get_status()
-        }
-        r = requests.post(url, json=data)
-        assert r.status_code == 200
-    def send_heart_beat(self):
-        logger.info(f"Send heart beat. Models: {[self.model_name]}. "
-                    f"Semaphore: {pretty_print_semaphore(model_semaphore)}. "
-                    f"global_counter: {global_counter}")
-        url = self.controller_addr + "/receive_heart_beat"
-        while True:
-            try:
-                ret = requests.post(url, json={
-                    "worker_name": self.worker_addr,
-                    "queue_length": self.get_queue_length()}, timeout=5)
-                exist = ret.json()["exist"]
-                break
-            except requests.exceptions.RequestException as e:
-                logger.error(f"heart beat error: {e}")
-            time.sleep(5)
-        if not exist:
-            self.register_to_controller()
-    def get_queue_length(self):
-        if model_semaphore is None:
-            return 0
-        else:
-            return args.limit_model_concurrency - model_semaphore._value + (len(
-                model_semaphore._waiters) if model_semaphore._waiters is not None else 0)
-    def get_status(self):
-        return {
-            "model_names": [self.model_name],
-            "speed": 1,
-            "queue_length": self.get_queue_length(),
-        }
-    @torch.inference_mode()
-    def generate_stream(self, params):
-        tokenizer, model, image_processor = self.tokenizer, self.model, self.image_processor
-        prompt = params["prompt"]
-        ori_prompt = prompt
-        images = params.get("images", None)
-        num_image_tokens = 0
-        if images is not None and len(images) > 0 and self.is_multimodal:
-            if len(images) > 0:
-                if len(images) != prompt.count(DEFAULT_IMAGE_TOKEN):
-                    raise ValueError("Number of images does not match number of <image> tokens in prompt")
-                if self.vision_tower == 'biovil':
-                    images = [load_image_from_base64_biovil(image) for image in images]
-                    images = process_image_biovil(images, self.vis_transforms_biovil)
-                else:
-                    images = [load_image_from_base64(image) for image in images]
-                    images = process_images(images, image_processor, model.config)
-                if type(images) is list:
-                    images = [image.to(self.model.device, dtype=torch.bfloat16) for image in images]
-                else:
-                    images = images.to(self.model.device, dtype=torch.bfloat16)
-                replace_token = DEFAULT_IMAGE_TOKEN
-                if getattr(self.model.config, 'mm_use_im_start_end', False):
-                    replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN
-                prompt = prompt.replace(DEFAULT_IMAGE_TOKEN, replace_token)
-                num_image_tokens = prompt.count(replace_token) * 196 if self.vision_tower == 'biovil' else prompt.count(replace_token) * model.get_vision_tower().num_patches
-            else:
-                images = None
-            image_args = {"images": images}
-        else:
-            images = None
-            image_args = {}
-        temperature = float(params.get("temperature", 1.0))
-        top_p = float(params.get("top_p", 1.0))
-        max_context_length = getattr(model.config, 'max_position_embeddings', 2048)
-        max_new_tokens = min(int(params.get("max_new_tokens", 256)), 1024)
-        stop_str = params.get("stop", None)
-        do_sample = True if temperature > 0.001 else False
-        input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to(self.device)
-        keywords = [stop_str]
-        stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
-        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
-        max_new_tokens = min(max_new_tokens, max_context_length - input_ids.shape[-1] - num_image_tokens)
-        if max_new_tokens < 1:
-            yield json.dumps({"text": ori_prompt + "Exceeds max token length. Please start a new conversation, thanks.", "error_code": 0}).encode() + b"\0"
-            return
-        thread = Thread(target=model.generate, kwargs=dict(
-            inputs=input_ids,
-            do_sample=do_sample,
-            temperature=temperature,
-            top_p=top_p,
-            max_new_tokens=max_new_tokens,
-            streamer=streamer,
-            stopping_criteria=[stopping_criteria],
-            use_cache=True,
-            **image_args
-        ))
-        thread.start()
-        generated_text = ori_prompt
-        for new_text in streamer:
-            generated_text += new_text
-            if generated_text.endswith(stop_str):
-                generated_text = generated_text[:-len(stop_str)]
-            yield json.dumps({"text": generated_text, "error_code": 0}).encode() + b"\0"
-    def generate_stream_gate(self, params):
-        try:
-            for x in self.generate_stream(params):
-                yield x
-        except ValueError as e:
-            print("Caught ValueError:", e)
-            ret = {
-                "text": server_error_msg,
-                "error_code": 1,
-            }
-            yield json.dumps(ret).encode() + b"\0"
-        except torch.cuda.CudaError as e:
-            print("Caught torch.cuda.CudaError:", e)
-            ret = {
-                "text": server_error_msg,
-                "error_code": 1,
-            }
-            yield json.dumps(ret).encode() + b"\0"
-        except Exception as e:
-            print("Caught Unknown Error", e)
-            ret = {
-                "text": server_error_msg,
-                "error_code": 1,
-            }
-            yield json.dumps(ret).encode() + b"\0"
-app = FastAPI()
-def release_model_semaphore(fn=None):
-    model_semaphore.release()
-    if fn is not None:
-        fn()
-@app.post("/worker_generate_stream")
-async def generate_stream(request: Request):
-    global model_semaphore, global_counter
-    global_counter += 1
-    params = await request.json()
-    if model_semaphore is None:
-        model_semaphore = asyncio.Semaphore(args.limit_model_concurrency)
-    await model_semaphore.acquire()
-    worker.send_heart_beat()
-    generator = worker.generate_stream_gate(params)
-    background_tasks = BackgroundTasks()
-    background_tasks.add_task(partial(release_model_semaphore, fn=worker.send_heart_beat))
-    return StreamingResponse(generator, background=background_tasks)
-@app.post("/worker_get_status")
-async def get_status(request: Request):
-    return worker.get_status()
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--host", type=str, default="localhost")
-    parser.add_argument("--port", type=int, default=21002)
-    parser.add_argument("--worker-address", type=str,
-        default="http://localhost:21002")
-    parser.add_argument("--controller-address", type=str,
-        default="http://localhost:21001")
-    parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
-    parser.add_argument("--model-base", type=str, default=None)
-    parser.add_argument("--model-name", type=str)
-    parser.add_argument("--device", type=str, default="cuda")
-    parser.add_argument("--multi-modal", action="store_true", help="Multimodal mode is automatically detected with model name, please make sure `llava` is included in the model path.")
-    parser.add_argument("--limit-model-concurrency", type=int, default=5)
-    parser.add_argument("--stream-interval", type=int, default=1)
-    parser.add_argument("--no-register", action="store_true")
-    parser.add_argument("--load-8bit", action="store_true")
-    parser.add_argument("--load-4bit", action="store_true")
-    parser.add_argument("--vision_tower", type=str, default="openai/clip-vit-large-patch14-336")
-    args = parser.parse_args()
-    logger.info(f"args: {args}")
-    if args.multi_modal:
-        logger.warning("Multimodal mode is automatically detected with model name, please make sure `llava` is included in the model path.")
-    worker = ModelWorker(args.controller_address,
-                         args.worker_address,
-                         worker_id,
-                         args.no_register,
-                         args.model_path,
-                         args.model_base,
-                         args.model_name,
-                         args.load_8bit,
-                         args.load_4bit,
-                         args.device,
-                         args.vision_tower)
-    uvicorn.run(app, host=args.host, port=args.port, log_level="info")

LLAVA_Biovil/llava/serve/register_worker.py DELETED Viewed

@@ -1,26 +0,0 @@
-"""
-Manually register workers.
-Usage:
-python3 -m fastchat.serve.register_worker --controller http://localhost:21001 --worker-name http://localhost:21002
-"""
-import argparse
-import requests
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--controller-address", type=str)
-    parser.add_argument("--worker-name", type=str)
-    parser.add_argument("--check-heart-beat", action="store_true")
-    args = parser.parse_args()
-    url = args.controller_address + "/register_worker"
-    data = {
-        "worker_name": args.worker_name,
-        "check_heart_beat": args.check_heart_beat,
-        "worker_status": None,
-    }
-    r = requests.post(url, json=data)
-    assert r.status_code == 200

LLAVA_Biovil/llava/serve/test_message.py DELETED Viewed

@@ -1,62 +0,0 @@
-import argparse
-import json
-import requests
-from LLAV.llava.conversation import default_conversation
-def main():
-    if args.worker_address:
-        worker_addr = args.worker_address
-    else:
-        controller_addr = args.controller_address
-        ret = requests.post(controller_addr + "/refresh_all_workers")
-        ret = requests.post(controller_addr + "/list_models")
-        models = ret.json()["models"]
-        models.sort()
-        print(f"Models: {models}")
-        ret = requests.post(controller_addr + "/get_worker_address",
-            json={"model": args.model_name})
-        worker_addr = ret.json()["address"]
-        print(f"worker_addr: {worker_addr}")
-    if worker_addr == "":
-        return
-    conv = default_conversation.copy()
-    conv.append_message(conv.roles[0], args.message)
-    prompt = conv.get_prompt()
-    headers = {"User-Agent": "LLaVA Client"}
-    pload = {
-        "model": args.model_name,
-        "prompt": prompt,
-        "max_new_tokens": args.max_new_tokens,
-        "temperature": 0.7,
-        "stop": conv.sep,
-    }
-    response = requests.post(worker_addr + "/worker_generate_stream", headers=headers,
-            json=pload, stream=True)
-    print(prompt.replace(conv.sep, "\n"), end="")
-    for chunk in response.iter_lines(chunk_size=8192, decode_unicode=False, delimiter=b"\0"):
-        if chunk:
-            data = json.loads(chunk.decode("utf-8"))
-            output = data["text"].split(conv.sep)[-1]
-            print(output, end="\r")
-    print("")
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--controller-address", type=str, default="http://localhost:21001")
-    parser.add_argument("--worker-address", type=str)
-    parser.add_argument("--model-name", type=str, default="facebook/opt-350m")
-    parser.add_argument("--max-new-tokens", type=int, default=32)
-    parser.add_argument("--message", type=str, default=
-        "Tell me a story with more than 1000 words.")
-    args = parser.parse_args()
-    main()

LLAVA_Biovil/llava/train/__init__.py DELETED Viewed

File without changes

LLAVA_Biovil/llava/train/llama_flash_attn_monkey_patch.py DELETED Viewed

@@ -1,115 +0,0 @@
-from typing import Optional, Tuple
-import warnings
-import torch
-import transformers
-from transformers.models.llama.modeling_llama import apply_rotary_pos_emb, repeat_kv
-try:
-    from flash_attn.flash_attn_interface import flash_attn_unpadded_qkvpacked_func
-except ImportError:
-    from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func
-from flash_attn.bert_padding import unpad_input, pad_input
-def forward(
-    self,
-    hidden_states: torch.Tensor,
-    attention_mask: Optional[torch.Tensor] = None,
-    position_ids: Optional[torch.Tensor] = None,
-    past_key_value: Optional[Tuple[torch.Tensor]] = None,
-    output_attentions: bool = False,
-    use_cache: bool = False,
-) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-    if output_attentions:
-        warnings.warn(
-            "Output attentions is not supported for patched `LlamaAttention`, returning `None` instead."
-        )
-    bsz, q_len, _ = hidden_states.size()
-    query_states = (
-        self.q_proj(hidden_states)
-        .view(bsz, q_len, self.num_heads, self.head_dim)
-        .transpose(1, 2)
-    )
-    key_states = (
-        self.k_proj(hidden_states)
-        .view(bsz, q_len, self.num_key_value_heads, self.head_dim)
-        .transpose(1, 2)
-    )
-    value_states = (
-        self.v_proj(hidden_states)
-        .view(bsz, q_len, self.num_key_value_heads, self.head_dim)
-        .transpose(1, 2)
-    )  # shape: (b, num_heads, s, head_dim)
-    kv_seq_len = key_states.shape[-2]
-    if past_key_value is not None:
-        kv_seq_len += past_key_value[0].shape[-2]
-    cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
-    query_states, key_states = apply_rotary_pos_emb(
-        query_states, key_states, cos, sin, position_ids
-    )
-    if past_key_value is not None:
-        # reuse k, v
-        key_states = torch.cat([past_key_value[0], key_states], dim=2)
-        value_states = torch.cat([past_key_value[1], value_states], dim=2)
-    past_key_value = (key_states, value_states) if use_cache else None
-    # repeat k/v heads if n_kv_heads < n_heads
-    key_states = repeat_kv(key_states, self.num_key_value_groups)
-    value_states = repeat_kv(value_states, self.num_key_value_groups)
-    # Transform the data into the format required by flash attention
-    qkv = torch.stack([query_states, key_states, value_states], dim=2)
-    qkv = qkv.transpose(1, 3)  # shape: [b, s, 3, num_heads, head_dim]
-    key_padding_mask = attention_mask
-    if key_padding_mask is None:
-        qkv = qkv.reshape(-1, 3, self.num_heads, self.head_dim)
-        cu_q_lens = torch.arange(
-            0, (bsz + 1) * q_len, step=q_len, dtype=torch.int32, device=qkv.device
-        )
-        max_s = q_len
-        output = flash_attn_unpadded_qkvpacked_func(
-            qkv, cu_q_lens, max_s, 0.0, softmax_scale=None, causal=True
-        )
-        output = output.view(bsz, q_len, -1)
-    else:
-        qkv = qkv.reshape(bsz, q_len, -1)
-        qkv, indices, cu_q_lens, max_s = unpad_input(qkv, key_padding_mask)
-        qkv = qkv.view(-1, 3, self.num_heads, self.head_dim)
-        output_unpad = flash_attn_unpadded_qkvpacked_func(
-            qkv, cu_q_lens, max_s, 0.0, softmax_scale=None, causal=True
-        )
-        output_unpad = output_unpad.reshape(-1, self.num_heads * self.head_dim)
-        output = pad_input(output_unpad, indices, bsz, q_len)
-    return self.o_proj(output), None, past_key_value
-# Disable the transformation of the attention mask in LlamaModel as the flash attention
-# requires the attention mask to be the same as the key_padding_mask
-def _prepare_decoder_attention_mask(
-    self, attention_mask, input_shape, inputs_embeds, past_key_values_length
-):
-    # [bsz, seq_len]
-    return attention_mask
-def replace_llama_attn_with_flash_attn():
-    cuda_major, cuda_minor = torch.cuda.get_device_capability()
-    if cuda_major < 8:
-        warnings.warn(
-            "Flash attention is only supported on A100 or H100 GPU during training due to head dim > 64 backward."
-            "ref: https://github.com/HazyResearch/flash-attention/issues/190#issuecomment-1523359593"
-        )
-    transformers.models.llama.modeling_llama.LlamaModel._prepare_decoder_attention_mask = (
-        _prepare_decoder_attention_mask
-    )
-    transformers.models.llama.modeling_llama.LlamaAttention.forward = forward

LLAVA_Biovil/llava/train/llama_patch.py DELETED Viewed

@@ -1,139 +0,0 @@
-from typing import List, Optional, Tuple
-import torch
-from torch import nn
-import warnings
-import transformers
-from transformers.models.llama.modeling_llama import apply_rotary_pos_emb
-from peft.tuners.lora import LoraLayer
-try:
-    from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func
-    from flash_attn.bert_padding import unpad_input, pad_input
-except Exception:
-    raise ModuleNotFoundError(
-        "Please install FlashAttention first, e.g., with pip install flash-attn --no-build-isolation, Learn more at https://github.com/Dao-AILab/flash-attention#installation-and-features"
-    )
-try:
-    from einops import rearrange
-except Exception:
-    raise ModuleNotFoundError("Please install einops first, e.g., with pip install einops")
-# ADAPTED from https://github.com/allenai/open-instruct/blob/main/open_instruct/llama_flash_attn_monkey_patch.py
-# AND https://github.com/lm-sys/FastChat/blob/main/fastchat/train/llama_flash_attn_monkey_patch.py
-# AND https://github.com/LAION-AI/Open-Assistant/blob/04fa9a24b2a58c8885b8aa6a2eb02b18de6b4961/model/model_training/models/patching_llama.py
-# AND Sourabh https://github.com/huggingface/transformers/commit/ee81bf5aee0d65f005d157c013777e3d27d8d6bf
-def forward(
-    self,
-    hidden_states: torch.Tensor,
-    attention_mask: Optional[torch.Tensor] = None,
-    position_ids: Optional[torch.Tensor] = None,
-    past_key_value: Optional[Tuple[torch.Tensor]] = None,
-    output_attentions: bool = False,
-    use_cache: bool = False,
-) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-    """Input shape: Batch x Time x Channel
-    attention_mask: [bsz, q_len]
-    """
-    if output_attentions:
-        warnings.warn("Output attentions is not supported for patched `LlamaAttention`, returning `None` instead.")
-    bsz, q_len, _ = hidden_states.size()
-    query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
-    key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
-    value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
-    # [bsz, q_len, nh, hd]
-    # [bsz, nh, q_len, hd]
-    kv_seq_len = key_states.shape[-2]
-    if past_key_value is not None:
-        kv_seq_len += past_key_value[0].shape[-2]
-    cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
-    query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
-    # Past Key value support
-    if past_key_value is not None:
-        # reuse k, v, self_attention
-        key_states = torch.cat([past_key_value[0], key_states], dim=2)
-        value_states = torch.cat([past_key_value[1], value_states], dim=2)
-    past_key_value = (key_states, value_states) if use_cache else None
-    # Flash attention codes from
-    # https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attention.py
-    # transform the data into the format required by flash attention
-    qkv = torch.stack([query_states, key_states, value_states], dim=2)  # [bsz, nh, 3, q_len, hd]
-    qkv = qkv.transpose(1, 3)  # [bsz, q_len, 3, nh, hd]
-    # We have disabled _prepare_decoder_attention_mask in LlamaModel
-    # the attention_mask should be the same as the key_padding_mask
-    key_padding_mask = attention_mask
-    if key_padding_mask is None:
-        qkv = rearrange(qkv, "b s ... -> (b s) ...")
-        max_s = q_len
-        cu_q_lens = torch.arange(0, (bsz + 1) * q_len, step=q_len, dtype=torch.int32, device=qkv.device)
-        output = flash_attn_varlen_qkvpacked_func(qkv, cu_q_lens, max_s, 0.0, softmax_scale=None, causal=True)
-        output = rearrange(output, "(b s) ... -> b s ...", b=bsz)
-    else:
-        nheads = qkv.shape[-2]
-        x = rearrange(qkv, "b s three h d -> b s (three h d)")
-        x_unpad, indices, cu_q_lens, max_s = unpad_input(x, key_padding_mask)
-        x_unpad = rearrange(x_unpad, "nnz (three h d) -> nnz three h d", three=3, h=nheads)
-        output_unpad = flash_attn_varlen_qkvpacked_func(
-            x_unpad, cu_q_lens, max_s, 0.0, softmax_scale=None, causal=True
-        )
-        output = rearrange(
-            pad_input(rearrange(output_unpad, "nnz h d -> nnz (h d)"), indices, bsz, q_len),
-            "b s (h d) -> b s h d",
-            h=nheads,
-        )
-    return self.o_proj(rearrange(output, "b s h d -> b s (h d)")), None, past_key_value
-# Disable the transformation of the attention mask in LlamaModel as the flash attention
-# requires the attention mask to be the same as the key_padding_mask
-def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
-    # [bsz, seq_len]
-    return attention_mask
-def replace_attn_with_flash_attn():
-    cuda_major, cuda_minor = torch.cuda.get_device_capability()
-    if cuda_major < 8:
-        print(
-            "Flash attention is only supported on Ampere or Hopper GPU during training due to head dim > 64 backward."
-            "ref: https://github.com/HazyResearch/flash-attention/issues/190#issuecomment-1523359593"
-        )
-    transformers.models.llama.modeling_llama.LlamaModel._prepare_decoder_attention_mask = (
-        _prepare_decoder_attention_mask
-    )
-    transformers.models.llama.modeling_llama.LlamaAttention.forward = forward
-def unplace_flash_attn_with_attn():
-    import importlib
-    import transformers
-    print("Reloading llama model, unpatching flash attention")
-    importlib.reload(transformers.models.llama.modeling_llama)
-# Adapted from https://github.com/tmm1/axolotl/blob/2eda9e02a9d15a7a3f92b41f257d9844d72fc220/src/axolotl/utils/models.py#L338
-def upcast_layer_for_flash_attention(model, torch_dtype):
-    # LlamaRMSNorm layers are in fp32 after kbit_training, so we need to
-    # convert them back to fp16/bf16 for flash-attn compatibility.
-    for name, module in model.named_modules():
-        if isinstance(module, LoraLayer):
-            module.to(torch_dtype)
-        if "norm" in name:
-            module.to(torch_dtype)
-        if "lm_head" in name or "embed_tokens" in name:
-            if hasattr(module, "weight"):
-                module.to(torch_dtype)
-    return model

LLAVA_Biovil/llava/train/llama_xformers_attn_monkey_patch.py DELETED Viewed

@@ -1,129 +0,0 @@
-"""
-Directly copied the code from https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/modules/llama_attn_hijack.py and made some adjustments
-"""
-import logging
-import math
-from typing import Optional, Tuple
-import torch
-import transformers.models.llama.modeling_llama
-from torch import nn
-try:
-    import xformers.ops
-except ImportError:
-    logging.error("xformers not found! Please install it before trying to use it.")
-def replace_llama_attn_with_xformers_attn():
-    transformers.models.llama.modeling_llama.LlamaAttention.forward = xformers_forward
-def xformers_forward(
-    self,
-    hidden_states: torch.Tensor,
-    attention_mask: Optional[torch.Tensor] = None,
-    position_ids: Optional[torch.LongTensor] = None,
-    past_key_value: Optional[Tuple[torch.Tensor]] = None,
-    output_attentions: bool = False,
-    use_cache: bool = False,
-) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-    # pylint: disable=duplicate-code
-    bsz, q_len, _ = hidden_states.size()
-    query_states = (
-        self.q_proj(hidden_states)
-        .view(bsz, q_len, self.num_heads, self.head_dim)
-        .transpose(1, 2)
-    )
-    key_states = (
-        self.k_proj(hidden_states)
-        .view(bsz, q_len, self.num_heads, self.head_dim)
-        .transpose(1, 2)
-    )
-    value_states = (
-        self.v_proj(hidden_states)
-        .view(bsz, q_len, self.num_heads, self.head_dim)
-        .transpose(1, 2)
-    )
-    kv_seq_len = key_states.shape[-2]
-    if past_key_value is not None:
-        kv_seq_len += past_key_value[0].shape[-2]
-    cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
-    (
-        query_states,
-        key_states,
-    ) = transformers.models.llama.modeling_llama.apply_rotary_pos_emb(
-        query_states, key_states, cos, sin, position_ids
-    )
-    # [bsz, nh, t, hd]
-    if past_key_value is not None:
-        # reuse k, v, self_attention
-        key_states = torch.cat([past_key_value[0], key_states], dim=2)
-        value_states = torch.cat([past_key_value[1], value_states], dim=2)
-    past_key_value = (key_states, value_states) if use_cache else None
-    # We only apply xformers optimizations if we don't need to output the whole attention matrix
-    if not output_attentions:
-        query_states = query_states.transpose(1, 2)
-        key_states = key_states.transpose(1, 2)
-        value_states = value_states.transpose(1, 2)
-        # This is a nasty hack. We know attention_mask in transformers is either LowerTriangular or all Zeros.
-        # We therefore check if one element in the upper triangular portion is zero. If it is, then the mask is all zeros.
-        if attention_mask is None or attention_mask[0, 0, 0, 1] == 0:
-            # input and output should be of form (bsz, q_len, num_heads, head_dim)
-            attn_output = xformers.ops.memory_efficient_attention(
-                query_states, key_states, value_states, attn_bias=None
-            )
-        else:
-            # input and output should be of form (bsz, q_len, num_heads, head_dim)
-            attn_output = xformers.ops.memory_efficient_attention(
-                query_states,
-                key_states,
-                value_states,
-                attn_bias=xformers.ops.LowerTriangularMask(),
-            )
-        attn_weights = None
-    else:
-        attn_weights = torch.matmul(
-            query_states, key_states.transpose(2, 3)
-        ) / math.sqrt(self.head_dim)
-        if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
-            raise ValueError(
-                f"Attention weights should be of size {(bsz * self.num_heads, q_len, kv_seq_len)}, but is"
-                f" {attn_weights.size()}"
-            )
-        if attention_mask is not None:
-            if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
-                raise ValueError(
-                    f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
-                )
-            attn_weights = attn_weights + attention_mask
-            attn_weights = torch.max(
-                attn_weights, torch.tensor(torch.finfo(attn_weights.dtype).min)
-            )
-        # upcast attention to fp32
-        attn_weights = nn.functional.softmax(
-            attn_weights, dim=-1, dtype=torch.float32
-        ).to(query_states.dtype)
-        attn_output = torch.matmul(attn_weights, value_states)
-        if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
-            raise ValueError(
-                f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
-                f" {attn_output.size()}"
-            )
-        attn_output = attn_output.transpose(1, 2)
-    attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
-    attn_output = self.o_proj(attn_output)
-    return attn_output, attn_weights, past_key_value

LLAVA_Biovil/llava/train/llava_trainer.py DELETED Viewed

@@ -1,801 +0,0 @@
-import json
-import math
-import os
-import shutil
-import sys
-import time
-from distutils import dist
-import torch
-from torch import nn
-import numpy as np
-from torch.utils.data import Sampler
-from packaging import version
-from transformers import Trainer, TrainerState, is_torch_tpu_available, is_apex_available
-from transformers.debug_utils import DebugOption
-from transformers.integrations import hp_params
-from transformers.deepspeed import deepspeed_init, deepspeed_load_checkpoint
-from transformers.trainer import (
-    is_sagemaker_mp_enabled,
-    get_parameter_names,
-    has_length,
-    ALL_LAYERNORM_LAYERS,
-    ShardedDDPOption,
-    logger, TRAINER_STATE_NAME,
-)
-from typing import List, Optional
-from transformers.trainer_pt_utils import get_model_param_count
-from transformers.trainer_utils import HPSearchBackend, speed_metrics, TrainOutput
-from transformers.training_args import ParallelMode
-from transformers.utils import is_accelerate_available
-if is_accelerate_available():
-    from accelerate import Accelerator, skip_first_batches
-    from accelerate import __version__ as accelerate_version
-    from accelerate.utils import DistributedDataParallelKwargs, GradientAccumulationPlugin
-    if version.parse(accelerate_version) > version.parse("0.20.3"):
-        from accelerate.utils import (
-            load_fsdp_model,
-            load_fsdp_optimizer,
-            save_fsdp_model,
-            save_fsdp_optimizer,
-        )
-if is_torch_tpu_available(check_device=False):
-    import torch_xla.core.xla_model as xm
-    import torch_xla.debug.metrics as met
-if is_apex_available():
-    from apex import amp
-# with open('/home/guests/chantal_pellegrini/RaDialog_LLaVA/data/train_token_freqs_radrestruct_balanced_50ep.json') as f:
-#     token_frequencies = json.load(f)
-# token_weights = {k: 1 / v for k, v in token_frequencies.items()}  # linear weighting
-# print("lin weighting")
-# token_weights = {k: 1 / (np.log(v) + 1) for k, v in token_frequencies.items()}  # log weighting, seems to work better in this case
-# print("log weighting")
-token_weights = None # no weighting
-print("no weighting")
-if token_weights is not None:
-    min_weight = min(token_weights.values())
-    extra_token_weight = min_weight / 100  # 100 smaller than the smallest weight
-def maybe_zero_3(param, ignore_status=False, name=None):
-    from deepspeed import zero
-    from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus
-    if hasattr(param, "ds_id"):
-        if param.ds_status == ZeroParamStatus.NOT_AVAILABLE:
-            if not ignore_status:
-                print(name, 'no ignore status')
-        with zero.GatheredParameters([param]):
-            param = param.data.detach().cpu().clone()
-    else:
-        param = param.detach().cpu().clone()
-    return param
-def get_mm_adapter_state_maybe_zero_3(named_params, keys_to_match):
-    to_return = {k: t for k, t in named_params if any(key_match in k for key_match in keys_to_match)}
-    to_return = {k: maybe_zero_3(v, ignore_status=True, name=k).cpu() for k, v in to_return.items()}
-    return to_return
-def split_to_even_chunks(indices, lengths, num_chunks):
-    """
-    Split a list of indices into `chunks` chunks of roughly equal lengths.
-    """
-    if len(indices) % num_chunks != 0:
-        return [indices[i::num_chunks] for i in range(num_chunks)]
-    num_indices_per_chunk = len(indices) // num_chunks
-    chunks = [[] for _ in range(num_chunks)]
-    chunks_lengths = [0 for _ in range(num_chunks)]
-    for index in indices:
-        shortest_chunk = chunks_lengths.index(min(chunks_lengths))
-        chunks[shortest_chunk].append(index)
-        chunks_lengths[shortest_chunk] += lengths[index]
-        if len(chunks[shortest_chunk]) == num_indices_per_chunk:
-            chunks_lengths[shortest_chunk] = float("inf")
-    return chunks
-def get_modality_length_grouped_indices(lengths, batch_size, world_size, generator=None):
-    # We need to use torch for the random part as a distributed sampler will set the random seed for torch.
-    assert all(l != 0 for l in lengths), "Should not have zero length."
-    if all(l > 0 for l in lengths) or all(l < 0 for l in lengths):
-        # all samples are in the same modality
-        return get_length_grouped_indices(lengths, batch_size, world_size, generator=generator)
-    mm_indices, mm_lengths = zip(*[(i, l) for i, l in enumerate(lengths) if l > 0])
-    lang_indices, lang_lengths = zip(*[(i, -l) for i, l in enumerate(lengths) if l < 0])
-    mm_shuffle = [mm_indices[i] for i in get_length_grouped_indices(mm_lengths, batch_size, world_size, generator=None)]
-    lang_shuffle = [lang_indices[i] for i in get_length_grouped_indices(lang_lengths, batch_size, world_size, generator=None)]
-    megabatch_size = world_size * batch_size
-    mm_megabatches = [mm_shuffle[i : i + megabatch_size] for i in range(0, len(mm_shuffle), megabatch_size)]
-    lang_megabatches = [lang_shuffle[i : i + megabatch_size] for i in range(0, len(lang_shuffle), megabatch_size)]
-    last_mm = mm_megabatches[-1]
-    last_lang = lang_megabatches[-1]
-    additional_batch = last_mm + last_lang
-    megabatches = mm_megabatches[:-1] + lang_megabatches[:-1]
-    megabatch_indices = torch.randperm(len(megabatches), generator=generator)
-    megabatches = [megabatches[i] for i in megabatch_indices]
-    if len(additional_batch) > 0:
-        megabatches.append(sorted(additional_batch))
-    return [i for megabatch in megabatches for i in megabatch]
-def get_length_grouped_indices(lengths, batch_size, world_size, generator=None, merge=True):
-    # We need to use torch for the random part as a distributed sampler will set the random seed for torch.
-    indices = torch.randperm(len(lengths), generator=generator)
-    megabatch_size = world_size * batch_size
-    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
-    megabatches = [sorted(megabatch, key=lambda i: lengths[i], reverse=True) for megabatch in megabatches]
-    megabatches = [split_to_even_chunks(megabatch, lengths, world_size) for megabatch in megabatches]
-    return [i for megabatch in megabatches for batch in megabatch for i in batch]
-class LengthGroupedSampler(Sampler):
-    r"""
-    Sampler that samples indices in a way that groups together features of the dataset of roughly the same length while
-    keeping a bit of randomness.
-    """
-    def __init__(
-        self,
-        batch_size: int,
-        world_size: int,
-        lengths: Optional[List[int]] = None,
-        generator=None,
-        group_by_modality: bool = False,
-    ):
-        if lengths is None:
-            raise ValueError("Lengths must be provided.")
-        self.batch_size = batch_size
-        self.world_size = world_size
-        self.lengths = lengths
-        self.generator = generator
-        self.group_by_modality = group_by_modality
-    def __len__(self):
-        return len(self.lengths)
-    def __iter__(self):
-        if self.group_by_modality:
-            indices = get_modality_length_grouped_indices(self.lengths, self.batch_size, self.world_size, generator=self.generator)
-        else:
-            indices = get_length_grouped_indices(self.lengths, self.batch_size, self.world_size, generator=self.generator)
-        return iter(indices)
-class LLaVATrainer(Trainer):
-    def compute_loss(self, model, inputs, return_outputs=False):
-        """
-        How the loss is computed by Trainer. By default, all models return the loss in the first element.
-        Subclass and override for custom behavior.
-        """
-        outputs = model(**inputs)
-        # Save past state if it exists
-        # TODO: this needs to be fixed and made cleaner later.
-        if self.args.past_index >= 0:
-            self._past = outputs[self.args.past_index]
-        if token_weights is not None:
-            # check if self has attribute vocab_weight, otherwise create
-            if not hasattr(self, 'vocab_weight'):
-                vocab = self.tokenizer.get_vocab()
-                self.vocab_weight = torch.ones(len(vocab)) * extra_token_weight  # default weight
-                # map them using vocab to correct indices
-                for k, v in token_weights.items():
-                    self.vocab_weight[vocab[k]] = v
-                self.vocab_weight = self.vocab_weight.to(self.args.device)
-            # Shift so that tokens < n predict n
-            shift_logits = outputs.logits[..., :-1, :].contiguous()
-            shift_labels = outputs.modified_labels[..., 1:].contiguous()
-            # Flatten the tokens
-            loss_fct = nn.CrossEntropyLoss(weight=self.vocab_weight)
-            shift_logits = shift_logits.view(-1, self.model.config.vocab_size)
-            shift_labels = shift_labels.view(-1)
-            # Enable model parallelism
-            shift_labels = shift_labels.to(shift_logits.device)
-            loss = loss_fct(shift_logits, shift_labels)
-            return (loss, outputs) if return_outputs else loss
-        else: #orginial compute_loss without weighting
-            # We don't use .loss here since the model may return tuples instead of ModelOutput.
-            loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0]
-            return (loss, outputs) if return_outputs else loss
-    def _inner_training_loop(
-        self, batch_size=None, args=None, resume_from_checkpoint=None, trial=None, ignore_keys_for_eval=None
-    ):
-        self.accelerator.free_memory()
-        self._train_batch_size = batch_size
-        logger.debug(f"Currently training with a batch size of: {self._train_batch_size}")
-        # Data loader and number of training steps
-        train_dataloader = self.get_train_dataloader()
-        # Setting up training control variables:
-        # number of training epochs: num_train_epochs
-        # number of training steps per epoch: num_update_steps_per_epoch
-        # total number of training steps to execute: max_steps
-        total_train_batch_size = self._train_batch_size * args.gradient_accumulation_steps * args.world_size
-        len_dataloader = None
-        if has_length(train_dataloader):
-            len_dataloader = len(train_dataloader)
-            num_update_steps_per_epoch = len_dataloader // args.gradient_accumulation_steps
-            num_update_steps_per_epoch = max(num_update_steps_per_epoch, 1)
-            num_examples = self.num_examples(train_dataloader)
-            if args.max_steps > 0:
-                max_steps = args.max_steps
-                num_train_epochs = args.max_steps // num_update_steps_per_epoch + int(
-                    args.max_steps % num_update_steps_per_epoch > 0
-                )
-                # May be slightly incorrect if the last batch in the training dataloader has a smaller size but it's
-                # the best we can do.
-                num_train_samples = args.max_steps * total_train_batch_size
-            else:
-                max_steps = math.ceil(args.num_train_epochs * num_update_steps_per_epoch)
-                num_train_epochs = math.ceil(args.num_train_epochs)
-                num_train_samples = self.num_examples(train_dataloader) * args.num_train_epochs
-        elif args.max_steps > 0:  # Rely on max_steps when dataloader does not have a working size
-            max_steps = args.max_steps
-            # Setting a very large number of epochs so we go as many times as necessary over the iterator.
-            num_train_epochs = sys.maxsize
-            num_update_steps_per_epoch = max_steps
-            num_examples = total_train_batch_size * args.max_steps
-            num_train_samples = args.max_steps * total_train_batch_size
-        else:
-            raise ValueError(
-                "args.max_steps must be set to a positive value if dataloader does not have a length, was"
-                f" {args.max_steps}"
-            )
-        # Compute absolute values for logging, eval, and save if given as ratio
-        if args.logging_steps and args.logging_steps < 1:
-            args.logging_steps = math.ceil(max_steps * args.logging_steps)
-        if args.eval_steps and args.eval_steps < 1:
-            args.eval_steps = math.ceil(max_steps * args.eval_steps)
-        if args.save_steps and args.save_steps < 1:
-            args.save_steps = math.ceil(max_steps * args.save_steps)
-        if DebugOption.UNDERFLOW_OVERFLOW in self.args.debug:
-            if self.args.n_gpu > 1:
-                # nn.DataParallel(model) replicates the model, creating new variables and module
-                # references registered here no longer work on other gpus, breaking the module
-                raise ValueError(
-                    "Currently --debug underflow_overflow is not supported under DP. Please use DDP"
-                    " (torch.distributed.launch)."
-                )
-            else:
-                debug_overflow = DebugUnderflowOverflow(self.model)  # noqa
-        delay_optimizer_creation = (
-            self.sharded_ddp is not None
-            and self.sharded_ddp != ShardedDDPOption.SIMPLE
-            or is_sagemaker_mp_enabled()
-            or self.fsdp is not None
-        )
-        # We need to reset the scheduler, as its parameters may be different on subsequent calls
-        if self._created_lr_scheduler:
-            self.lr_scheduler = None
-            self._created_lr_scheduler = False
-        if self.is_deepspeed_enabled:
-            self.optimizer, self.lr_scheduler = deepspeed_init(self, num_training_steps=max_steps)
-        if not delay_optimizer_creation:
-            self.create_optimizer_and_scheduler(num_training_steps=max_steps)
-        self.state = TrainerState()
-        self.state.is_hyper_param_search = trial is not None
-        # Activate gradient checkpointing if needed
-        if args.gradient_checkpointing:
-            self.model.gradient_checkpointing_enable()
-        model = self._wrap_model(self.model_wrapped)
-        if is_sagemaker_mp_enabled() and resume_from_checkpoint is not None:
-            self._load_from_checkpoint(resume_from_checkpoint, model)
-        # as the model is wrapped, don't use `accelerator.prepare`
-        # this is for unhandled cases such as
-        # Fairscale Sharded DDP, FSDP-XLA, SageMaker MP/DP, DataParallel, IPEX
-        use_accelerator_prepare = True if model is self.model else False
-        if delay_optimizer_creation:
-            self.create_optimizer_and_scheduler(num_training_steps=max_steps)
-        # prepare using `accelerator` prepare
-        if use_accelerator_prepare:
-            self.model.train()
-            if hasattr(self.lr_scheduler, "step"):
-                if self.use_apex:
-                    model = self.accelerator.prepare(self.model)
-                else:
-                    model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
-            else:
-                # to handle cases wherein we pass "DummyScheduler" such as when it is specified in DeepSpeed config.
-                model, self.optimizer, self.lr_scheduler = self.accelerator.prepare(
-                    self.model, self.optimizer, self.lr_scheduler
-                )
-        if self.is_fsdp_enabled:
-            self.model = model
-        # for the rest of this function `model` is the outside model, whether it was wrapped or not
-        if model is not self.model:
-            self.model_wrapped = model
-        # backward compatibility
-        if self.is_deepspeed_enabled:
-            self.deepspeed = self.model_wrapped
-        # deepspeed ckpt loading
-        if resume_from_checkpoint is not None and self.is_deepspeed_enabled:
-            print(f"DeepSpeed info: Loading model from {resume_from_checkpoint}")
-            deepspeed_load_checkpoint(self.model_wrapped, resume_from_checkpoint)
-            # get step from opt state
-            # Assuming `optimizer_state_dict` is the dictionary you've loaded from the checkpoint
-            for param_tensor, state in self.lr_scheduler.optimizer.state.items():
-                step_tensor = state['step']
-                step_value = step_tensor.item()  # Convert tensor to a Python number
-                print(f"Step value for a parameter tensor: {step_value}")
-                # Since all parameters should have been updated the same number of times,
-                # you can break after the first iteration
-                break
-            # step scheduler to match
-            for _ in range(int(step_value)):
-                self.lr_scheduler.step()
-        # Check if saved optimizer or scheduler states exist
-        self._load_optimizer_and_scheduler(resume_from_checkpoint)
-        # important: at this point:
-        # self.model         is the Transformers Model
-        # self.model_wrapped is DDP(Transformers Model), Deepspeed(Transformers Model), etc.
-        # Train!
-        logger.info("***** Running training *****")
-        logger.info(f"  Num examples = {num_examples:,}")
-        logger.info(f"  Num Epochs = {num_train_epochs:,}")
-        logger.info(f"  Instantaneous batch size per device = {self.args.per_device_train_batch_size:,}")
-        if self.args.per_device_train_batch_size != self._train_batch_size:
-            logger.info(f"  Training with DataParallel so batch size has been adjusted to: {self._train_batch_size:,}")
-        logger.info(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_train_batch_size:,}")
-        logger.info(f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}")
-        logger.info(f"  Total optimization steps = {max_steps:,}")
-        logger.info(f"  Number of trainable parameters = {get_model_param_count(model, trainable_only=True):,}")
-        self.state.epoch = 0
-        start_time = time.time()
-        epochs_trained = 0
-        steps_trained_in_current_epoch = 0
-        steps_trained_progress_bar = None
-        # Check if continuing training from a checkpoint
-        if resume_from_checkpoint is not None and os.path.isfile(
-            os.path.join(resume_from_checkpoint, TRAINER_STATE_NAME)
-        ):
-            self.state = TrainerState.load_from_json(os.path.join(resume_from_checkpoint, TRAINER_STATE_NAME))
-            epochs_trained = self.state.global_step // num_update_steps_per_epoch
-            if not args.ignore_data_skip:
-                steps_trained_in_current_epoch = self.state.global_step % (num_update_steps_per_epoch)
-                steps_trained_in_current_epoch *= args.gradient_accumulation_steps
-            else:
-                steps_trained_in_current_epoch = 0
-            logger.info("  Continuing training from checkpoint, will skip to saved global_step")
-            logger.info(f"  Continuing training from epoch {epochs_trained}")
-            logger.info(f"  Continuing training from global step {self.state.global_step}")
-            if not args.ignore_data_skip:
-                logger.info(
-                    f"  Will skip the first {epochs_trained} epochs then the first"
-                    f" {steps_trained_in_current_epoch} batches in the first epoch."
-                )
-        # Update the references
-        self.callback_handler.model = self.model
-        self.callback_handler.optimizer = self.optimizer
-        self.callback_handler.lr_scheduler = self.lr_scheduler
-        self.callback_handler.train_dataloader = train_dataloader
-        if self.hp_name is not None and self._trial is not None:
-            # use self._trial because the SigOpt/Optuna hpo only call `_hp_search_setup(trial)` instead of passing trial
-            # parameter to Train when using DDP.
-            self.state.trial_name = self.hp_name(self._trial)
-        if trial is not None:
-            assignments = trial.assignments if self.hp_search_backend == HPSearchBackend.SIGOPT else trial
-            self.state.trial_params = hp_params(assignments)
-        else:
-            self.state.trial_params = None
-        # This should be the same if the state has been saved but in case the training arguments changed, it's safer
-        # to set this after the load.
-        self.state.max_steps = max_steps
-        self.state.num_train_epochs = num_train_epochs
-        self.state.is_local_process_zero = self.is_local_process_zero()
-        self.state.is_world_process_zero = self.is_world_process_zero()
-        # tr_loss is a tensor to avoid synchronization of TPUs through .item()
-        tr_loss = torch.tensor(0.0).to(args.device)
-        # _total_loss_scalar is updated everytime .item() has to be called on tr_loss and stores the sum of all losses
-        self._total_loss_scalar = 0.0
-        self._globalstep_last_logged = self.state.global_step
-        model.zero_grad()
-        self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
-        # Skip the first epochs_trained epochs to get the random state of the dataloader at the right point.
-        if not args.ignore_data_skip:
-            for epoch in range(epochs_trained):
-                for _ in train_dataloader:
-                    break
-        total_batched_samples = 0
-        for epoch in range(epochs_trained, num_train_epochs):
-            epoch_iterator = train_dataloader
-            # Reset the past mems state at the beginning of each epoch if necessary.
-            if args.past_index >= 0:
-                self._past = None
-            steps_in_epoch = (
-                len(epoch_iterator)
-                if len_dataloader is not None
-                else args.max_steps * args.gradient_accumulation_steps
-            )
-            self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-            if epoch == epochs_trained and resume_from_checkpoint is not None and steps_trained_in_current_epoch == 0:
-                self._load_rng_state(resume_from_checkpoint)
-            rng_to_sync = False
-            steps_skipped = 0
-            if steps_trained_in_current_epoch > 0:
-                epoch_iterator = skip_first_batches(epoch_iterator, steps_trained_in_current_epoch)
-                steps_skipped = steps_trained_in_current_epoch
-                steps_trained_in_current_epoch = 0
-                rng_to_sync = True
-            step = -1
-            for step, inputs in enumerate(epoch_iterator):
-                total_batched_samples += 1
-                if rng_to_sync:
-                    self._load_rng_state(resume_from_checkpoint)
-                    rng_to_sync = False
-                # Skip past any already trained steps if resuming training
-                if steps_trained_in_current_epoch > 0:
-                    steps_trained_in_current_epoch -= 1
-                    if steps_trained_progress_bar is not None:
-                        steps_trained_progress_bar.update(1)
-                    if steps_trained_in_current_epoch == 0:
-                        self._load_rng_state(resume_from_checkpoint)
-                    continue
-                elif steps_trained_progress_bar is not None:
-                    steps_trained_progress_bar.close()
-                    steps_trained_progress_bar = None
-                if step % args.gradient_accumulation_steps == 0:
-                    self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
-                with self.accelerator.accumulate(model):
-                    tr_loss_step = self.training_step(model, inputs)
-                if (
-                    args.logging_nan_inf_filter
-                    and not is_torch_tpu_available()
-                    and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
-                ):
-                    # if loss is nan or inf simply add the average of previous logged losses
-                    tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
-                else:
-                    tr_loss += tr_loss_step
-                self.current_flos += float(self.floating_point_ops(inputs))
-                is_last_step_and_steps_less_than_grad_acc = (
-                    steps_in_epoch <= args.gradient_accumulation_steps and (step + 1) == steps_in_epoch
-                )
-                if (
-                    total_batched_samples % args.gradient_accumulation_steps == 0
-                    or
-                    # last step in epoch but step is always smaller than gradient_accumulation_steps
-                    is_last_step_and_steps_less_than_grad_acc
-                ):
-                    # the `or` condition of `is_last_step_and_steps_less_than_grad_acc` is not covered
-                    # in accelerate. So, explicitly enable sync gradients to True in that case.
-                    if is_last_step_and_steps_less_than_grad_acc or (
-                        version.parse(accelerate_version) <= version.parse("0.20.3")
-                    ):
-                        self.accelerator.gradient_state._set_sync_gradients(True)
-                    # Gradient clipping
-                    if args.max_grad_norm is not None and args.max_grad_norm > 0:
-                        # deepspeed does its own clipping
-                        if self.do_grad_scaling:
-                            # Reduce gradients first for XLA
-                            if is_torch_tpu_available():
-                                gradients = xm._fetch_gradients(self.optimizer)
-                                xm.all_reduce("sum", gradients, scale=1.0 / xm.xrt_world_size())
-                            # AMP: gradients need unscaling
-                            self.scaler.unscale_(self.optimizer)
-                        if is_sagemaker_mp_enabled() and args.fp16:
-                            self.optimizer.clip_master_grads(args.max_grad_norm)
-                        elif hasattr(self.optimizer, "clip_grad_norm"):
-                            # Some optimizers (like the sharded optimizer) have a specific way to do gradient clipping
-                            self.optimizer.clip_grad_norm(args.max_grad_norm)
-                        elif hasattr(model, "clip_grad_norm_"):
-                            # Some models (like FullyShardedDDP) have a specific way to do gradient clipping
-                            model.clip_grad_norm_(args.max_grad_norm)
-                        elif self.use_apex:
-                            # Revert to normal clipping otherwise, handling Apex or full precision
-                            nn.utils.clip_grad_norm_(
-                                amp.master_params(self.optimizer),
-                                args.max_grad_norm,
-                            )
-                        else:
-                            self.accelerator.clip_grad_norm_(
-                                model.parameters(),
-                                args.max_grad_norm,
-                            )
-                    # Optimizer step
-                    optimizer_was_run = True
-                    if is_torch_tpu_available():
-                        if self.do_grad_scaling:
-                            self.scaler.step(self.optimizer)
-                            self.scaler.update()
-                        else:
-                            # tpu-comment: accelerate wrapped optimizers call xm.optimizer_step
-                            self.optimizer.step()
-                    elif self.do_grad_scaling:
-                        scale_before = self.scaler.get_scale()
-                        self.scaler.step(self.optimizer)
-                        self.scaler.update()
-                        scale_after = self.scaler.get_scale()
-                        optimizer_was_run = scale_before <= scale_after
-                    else:
-                        self.optimizer.step()
-                        optimizer_was_run = not self.accelerator.optimizer_step_was_skipped
-                    if optimizer_was_run:
-                        # Delay optimizer scheduling until metrics are generated
-                        if not isinstance(self.lr_scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
-                            self.lr_scheduler.step()
-                    model.zero_grad()
-                    self.state.global_step += 1
-                    self.state.epoch = epoch + (step + 1 + steps_skipped) / steps_in_epoch
-                    self.control = self.callback_handler.on_step_end(args, self.state, self.control)
-                    self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
-                else:
-                    self.control = self.callback_handler.on_substep_end(args, self.state, self.control)
-                if self.control.should_epoch_stop or self.control.should_training_stop:
-                    break
-            if step < 0:
-                logger.warning(
-                    "There seems to be not a single sample in your epoch_iterator, stopping training at step"
-                    f" {self.state.global_step}! This is expected if you're using an IterableDataset and set"
-                    f" num_steps ({max_steps}) higher than the number of available samples."
-                )
-                self.control.should_training_stop = True
-            self.control = self.callback_handler.on_epoch_end(args, self.state, self.control)
-            self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
-            if DebugOption.TPU_METRICS_DEBUG in self.args.debug:
-                if is_torch_tpu_available():
-                    # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)
-                    xm.master_print(met.metrics_report())
-                else:
-                    logger.warning(
-                        "You enabled PyTorch/XLA debug metrics but you don't have a TPU "
-                        "configured. Check your training configuration if this is unexpected."
-                    )
-            if self.control.should_training_stop:
-                break
-        if args.past_index and hasattr(self, "_past"):
-            # Clean the state at the end of training
-            delattr(self, "_past")
-        logger.info("\n\nTraining completed. Do not forget to share your model on huggingface.co/models =)\n\n")
-        if args.load_best_model_at_end and self.state.best_model_checkpoint is not None:
-            # Wait for everyone to get here so we are sur the model has been saved by process 0.
-            if is_torch_tpu_available():
-                xm.rendezvous("load_best_model_at_end")
-            elif args.parallel_mode == ParallelMode.DISTRIBUTED:
-                dist.barrier()
-            # elif is_sagemaker_mp_enabled():
-            #     smp.barrier()
-            self._load_best_model()
-        # add remaining tr_loss
-        self._total_loss_scalar += tr_loss.item()
-        train_loss = self._total_loss_scalar / self.state.global_step
-        metrics = speed_metrics("train", start_time, num_samples=num_train_samples, num_steps=self.state.max_steps)
-        self.store_flos()
-        metrics["total_flos"] = self.state.total_flos
-        metrics["train_loss"] = train_loss
-        self.is_in_train = False
-        self._memory_tracker.stop_and_update_metrics(metrics)
-        self.log(metrics)
-        run_dir = self._get_output_dir(trial)
-        checkpoints_sorted = self._sorted_checkpoints(use_mtime=False, output_dir=run_dir)
-        # Delete the last checkpoint when save_total_limit=1 if it's different from the best checkpoint and process allowed to save.
-        if self.args.should_save and self.state.best_model_checkpoint is not None and self.args.save_total_limit == 1:
-            for checkpoint in checkpoints_sorted:
-                if checkpoint != self.state.best_model_checkpoint:
-                    logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
-                    shutil.rmtree(checkpoint)
-        self.control = self.callback_handler.on_train_end(args, self.state, self.control)
-        return TrainOutput(self.state.global_step, train_loss, metrics)
-    def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
-        if self.train_dataset is None or not has_length(self.train_dataset):
-            return None
-        if self.args.group_by_modality_length:
-            lengths = self.train_dataset.modality_lengths
-            return LengthGroupedSampler(
-                self.args.train_batch_size,
-                world_size=self.args.world_size * self.args.gradient_accumulation_steps,
-                lengths=lengths,
-                group_by_modality=True,
-            )
-        else:
-            return super()._get_train_sampler()
-    def create_optimizer(self):
-        """
-        Setup the optimizer.
-        We provide a reasonable default that works well. If you want to use something else, you can pass a tuple in the
-        Trainer's init through `optimizers`, or subclass and override this method in a subclass.
-        """
-        if is_sagemaker_mp_enabled():
-            return super().create_optimizer()
-        if self.sharded_ddp == ShardedDDPOption.SIMPLE:
-            return super().create_optimizer()
-        opt_model = self.model
-        if self.optimizer is None:
-            decay_parameters = get_parameter_names(opt_model, ALL_LAYERNORM_LAYERS)
-            decay_parameters = [name for name in decay_parameters if "bias" not in name]
-            if self.args.mm_projector_lr is not None:
-                projector_parameters = [name for name, _ in opt_model.named_parameters() if "mm_projector" in name]
-                optimizer_grouped_parameters = [
-                    {
-                        "params": [
-                            p for n, p in opt_model.named_parameters() if (n in decay_parameters and n not in projector_parameters and p.requires_grad)
-                        ],
-                        "weight_decay": self.args.weight_decay,
-                    },
-                    {
-                        "params": [
-                            p for n, p in opt_model.named_parameters() if (n not in decay_parameters and n not in projector_parameters and p.requires_grad)
-                        ],
-                        "weight_decay": 0.0,
-                    },
-                    {
-                        "params": [
-                            p for n, p in opt_model.named_parameters() if (n in decay_parameters and n in projector_parameters and p.requires_grad)
-                        ],
-                        "weight_decay": self.args.weight_decay,
-                        "lr": self.args.mm_projector_lr,
-                    },
-                    {
-                        "params": [
-                            p for n, p in opt_model.named_parameters() if (n not in decay_parameters and n in projector_parameters and p.requires_grad)
-                        ],
-                        "weight_decay": 0.0,
-                        "lr": self.args.mm_projector_lr,
-                    },
-                ]
-            else:
-                optimizer_grouped_parameters = [
-                    {
-                        "params": [
-                            p for n, p in opt_model.named_parameters() if (n in decay_parameters and p.requires_grad)
-                        ],
-                        "weight_decay": self.args.weight_decay,
-                    },
-                    {
-                        "params": [
-                            p for n, p in opt_model.named_parameters() if (n not in decay_parameters and p.requires_grad)
-                        ],
-                        "weight_decay": 0.0,
-                    },
-                ]
-            optimizer_cls, optimizer_kwargs = Trainer.get_optimizer_cls_and_kwargs(self.args)
-            if self.sharded_ddp == ShardedDDPOption.SIMPLE:
-                self.optimizer = OSS(
-                    params=optimizer_grouped_parameters,
-                    optim=optimizer_cls,
-                    **optimizer_kwargs,
-                )
-            else:
-                self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs)
-                if optimizer_cls.__name__ == "Adam8bit":
-                    import bitsandbytes
-                    manager = bitsandbytes.optim.GlobalOptimManager.get_instance()
-                    skipped = 0
-                    for module in opt_model.modules():
-                        if isinstance(module, nn.Embedding):
-                            skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values())
-                            logger.info(f"skipped {module}: {skipped/2**20}M params")
-                            manager.register_module_override(module, "weight", {"optim_bits": 32})
-                            logger.debug(f"bitsandbytes: will optimize {module} in fp32")
-                    logger.info(f"skipped: {skipped/2**20}M params")
-        return self.optimizer
-    def _save_checkpoint(self, model, trial, metrics=None):
-        if getattr(self.args, 'tune_mm_mlp_adapter', False):
-            from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
-            checkpoint_folder = f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}"
-            run_dir = self._get_output_dir(trial=trial)
-            output_dir = os.path.join(run_dir, checkpoint_folder)
-            # Only save Adapter
-            keys_to_match = ['mm_projector', 'vision_resampler']
-            if getattr(self.args, "use_im_start_end", False):
-                keys_to_match.extend(['embed_tokens', 'embed_in'])
-            weight_to_save = get_mm_adapter_state_maybe_zero_3(self.model.named_parameters(), keys_to_match)
-            if self.args.local_rank == 0 or self.args.local_rank == -1:
-                self.model.config.save_pretrained(output_dir)
-                torch.save(weight_to_save, os.path.join(output_dir, f'mm_projector.bin'))
-        else:
-            super(LLaVATrainer, self)._save_checkpoint(model, trial, metrics)
-    def _save(self, output_dir: Optional[str] = None, state_dict=None):
-        if getattr(self.args, 'tune_mm_mlp_adapter', False):
-            pass
-        else:
-            super(LLaVATrainer, self)._save(output_dir, state_dict)