starriver030515
/

diffusion

Model card Files Files and versions Community

starriver030515 commited on Jul 17, 2024

Commit

a501a0c

verified ·

1 Parent(s): 360ea8c

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

.gitattributes +4 -0
coco/annotations/captions_train2017.json +3 -0
coco/annotations/captions_val2017.json +0 -0
coco/annotations/instances_train2017.json +3 -0
coco/annotations/instances_val2017.json +3 -0
coco/annotations/person_keypoints_train2017.json +3 -0
coco/annotations/person_keypoints_val2017.json +0 -0
coco/images/train2017/train2017.zip +3 -0
gen_pic.py +85 -0
generate.sh +1 -0
utils/change_pic_name.py +29 -0
utils/copy_pic.py +22 -0
utils/count.py +19 -0
utils/download_coco.py +67 -0
utils/download_model.py +6 -0
utils/expand_dataset.py +65 -0
utils/merge_json.py +35 -0
utils/split_dataset.py +30 -0
utils/sum.py +6 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+coco/annotations/captions_train2017.json filter=lfs diff=lfs merge=lfs -text
+coco/annotations/instances_train2017.json filter=lfs diff=lfs merge=lfs -text
+coco/annotations/instances_val2017.json filter=lfs diff=lfs merge=lfs -text
+coco/annotations/person_keypoints_train2017.json filter=lfs diff=lfs merge=lfs -text

coco/annotations/captions_train2017.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b62086319480e0739ef390d04084515defb9c213ff13605a036061e33314317
+size 91865115

coco/annotations/captions_val2017.json ADDED Viewed

The diff for this file is too large to render. See raw diff

coco/annotations/instances_train2017.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:610fce4944abdeb15354cc765333805529359d12d88f2f711393ca586901d01d
+size 469785474

coco/annotations/instances_val2017.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8c7f7908f1d7278341fae127d0da654f102f11bd7b21d8aeefa635b8c810b6f
+size 19987840

coco/annotations/person_keypoints_train2017.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fc1549d934547c470384d8a207c38707ca33fc016d00e33b795e408603af83e
+size 238884731

coco/annotations/person_keypoints_val2017.json ADDED Viewed

The diff for this file is too large to render. See raw diff

coco/images/train2017/train2017.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:958d3ea2e86aee31d7f32e395915482201aad3d268d848453568075f5f6fb6d5
+size 19330868178

gen_pic.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import argparse
+from diffusers import DiffusionPipeline
+import torch
+from PIL import Image
+import os
+import json
+parser = argparse.ArgumentParser(description="Diffusion Pipeline with Arguments")
+parser.add_argument(
+    "--json_filename",
+    type=str,
+    required=True,
+    help="Path to the JSON file containing text data",
+)
+parser.add_argument(
+    "--cuda", type=int, required=True, help="CUDA device to use for processing"
+)
+args = parser.parse_args()
+json_filename = args.json_filename
+cuda_device = f"cuda:{args.cuda}"
+print(json_filename, cuda_device)
+model_path = "./sdxl"
+image_dir = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images"
+if not os.path.exists(image_dir):
+    os.makedirs(image_dir)
+base = DiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    torch_dtype=torch.float16,
+    variant="fp16",
+    use_safetensors=True,
+)
+# base.scheduler.step_schedule = {
+#     "start": 0.5,
+#     "end": 0.0,
+#     "interpolation_type": "linear",
+# }
+base.to(cuda_device)
+refiner = DiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-refiner-1.0",
+    text_encoder_2=base.text_encoder_2,
+    vae=base.vae,
+    torch_dtype=torch.float16,
+    use_safetensors=True,
+    variant="fp16",
+)
+# refiner.scheduler.step_schedule = {
+#     "start": 0.5,
+#     "end": 0.0,
+#     "interpolation_type": "linear",
+# }
+refiner.to(cuda_device)
+with open(json_filename, "r") as f:
+    text_data = json.load(f)
+n_steps = 60
+high_noise_frac = 0.8
+guidance_scale = 20
+for text in text_data:
+    image = base(
+        prompt=text["conversations"][1]["value"],
+        num_inference_steps=n_steps,
+        denoising_end=high_noise_frac,
+        output_type="latent",
+        guidance_scale=guidance_scale,
+    ).images
+    image = refiner(
+        prompt=text["conversations"][1]["value"],
+        num_inference_steps=n_steps,
+        denoising_start=high_noise_frac,
+        image=image,
+        guidance_scale=guidance_scale,
+    ).images[0]
+    subdir = text["image"].split("/")[0]
+    if not os.path.exists(os.path.join(image_dir, subdir)):
+        os.makedirs(os.path.join(image_dir, subdir))
+    image_path = os.path.join(image_dir, text["image"])
+    image.save(image_path)
+print("所有图像已成功生成并保存。")

generate.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ srun -p s2_bigdata --gres=gpu:1 --kill-on-bad-exit=1 python gen_pic.py --json_filename=/mnt/petrelfs/zhuchenglin/diffusion/annotations/annotations_part_8.json --cuda=0

utils/change_pic_name.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+import shutil
+import json
+import random
+# path to the generated images
+source_image_folder = "/mnt/petrelfs/zhuchenglin/diffusion/images_large"
+# path to the target folder
+target_image_folder = (
+    "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images"
+)
+# path to the COCO annotations file
+annotations_coco_path = (
+    "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json"
+)
+with open(annotations_coco_path, "r") as f:
+    annotations = json.load(f)
+new_annotations = []
+for index, annotation in enumerate(annotations["annotations"][:200000]):
+    print(index)
+    image_id, pid = annotation["image_id"], annotation["id"]
+    source_image_path = os.path.join(
+        source_image_folder, f"{image_id:012d}_{pid}_gen.jpg"
+    )
+    target_image_path = os.path.join(target_image_folder, f"{index}.jpg")
+    if os.path.exists(source_image_path):
+        shutil.copy(source_image_path, target_image_path)

utils/copy_pic.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os
+import shutil
+import json
+source_folder = '/mnt/petrelfs/zhuchenglin/diffusion/coco/images/train2017'
+target_folder = '/mnt/petrelfs/zhuchenglin/diffusion/images_large'
+if not os.path.exists(target_folder):
+    os.makedirs(target_folder)
+anno_json_path = "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json"
+with open(anno_json_path, 'r') as f:
+    annotation_data = json.load(f)
+annotations = annotation_data["annotations"][:200000]
+count = 0
+for image in annotations:
+    source_path = os.path.join(source_folder, f'{image["image_id"]:012}.jpg')
+    target_path = os.path.join(target_folder, f'{image["image_id"]:012}.jpg')
+    count += 1
+    print(source_path,count)
+    shutil.copy(source_path, target_path)

utils/count.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import os
+def count_images(directory):
+    # 支持的图片文件扩展名
+    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
+    image_count = 0
+    # 遍历指定目录及其子目录
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            # 检查文件扩展名是否在支持的扩展名集合中
+            if os.path.splitext(file)[1].lower() in image_extensions:
+                image_count += 1
+    return image_count
+# 示例用法：替换 'path_to_your_directory' 为你的目录路径
+directory_path = '/mnt/petrelfs/zhuchenglin/diffusion/images_large'
+print(f"Total images in '{directory_path}':", count_images(directory_path))

utils/download_coco.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import subprocess
+import json
+import os
+import requests
+# 参数配置
+base_url = "https://datasets-server.huggingface.co/rows"
+dataset_path = "cat-state/mscoco-1st-caption"
+config = "default"
+split = "train"
+offset = 0
+length = 100
+total_data = 1000  # 目标获取的数据总量
+iterations = total_data // length  # 需要循环的次数
+image_dir = "../images_large"
+if not os.path.exists(image_dir):
+    os.makedirs(image_dir)
+text_data = {}
+# 循环多次，以获取全部数据
+for i in range(iterations):
+    # 构建请求 URL
+    url = f"{base_url}?dataset={dataset_path}&config={config}&split={split}&offset={offset}&length={length}"
+    # 获取数据
+    result = subprocess.run(
+        ["curl", "-X", "GET", url],
+        capture_output=True,
+        text=True
+    )
+    output = result.stdout
+    try:
+        data_dict = json.loads(output)
+    except json.JSONDecodeError:
+        print(f"无法将输出转换为字典。输出内容: {output}")
+        continue
+    if 'rows' in data_dict:
+        for item in data_dict['rows']:
+            row_idx = item['row_idx']
+            row = item['row']
+            image_url = row.get('url')
+            text = row.get('caption')
+            if image_url:
+                image_filename = f"{image_dir}/{row_idx}_row_image.jpg"
+                response = requests.get(image_url, stream=True)
+                if response.status_code == 200:
+                    with open(image_filename, 'wb') as f:
+                        for chunk in response.iter_content(chunk_size=8192):
+                            f.write(chunk)
+            text_data[f"{row_idx}_row_image"] = text
+    offset += length  # 更新偏移量以获取下一批数据
+# 保存文本数据
+json_filename = "../data/row_image_texts_large.json"
+with open(json_filename, 'w') as f:
+    json.dump(text_data, f, indent=4)
+print("图像下载并保存完成，文本信息已保存到 row_image_texts.json")

utils/download_model.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from huggingface_hub import snapshot_download
+snapshot_download(
+  repo_id="stabilityai/stable-diffusion-xl-base-1.0",
+  local_dir="../sdxl",
+  max_workers=8
+)

utils/expand_dataset.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import os
+import shutil
+import json
+import random
+# prompt in llava
+prompt_for_image = [
+    "Describe the image concisely.",
+    "Provide a brief description of the given image.",
+    "Offer a succinct explanation of the picture presented.",
+    "Summarize the visual content of the image."
+    "Give a short and clear explanation of the subsequent image.",
+    "Share a concise interpretation of the image provided.",
+    "Present a compact description of the photo's key features.",
+    "Relay a brief, clear account of the picture shown.",
+    "Render a clear and concise summary of the photo.",
+    "Write a terse but informative summary of the picture.",
+    "Create a compact narrative representing the image presented.",
+]
+# path to the generated images
+source_folder = "/mnt/petrelfs/zhuchenglin/diffusion/images_large1"
+# path to the llava training images, which has the initial 660 subfolders
+target_folder = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images"
+# path to the llava training annotations folder
+target_anno_folder = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain"
+# path to the COCO annotations file
+annotations_coco_path = (
+    "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json"
+)
+with open(annotations_coco_path, "r") as f:
+    annotations = json.load(f)
+new_annotations = []
+for index, annotation in enumerate(annotations["annotations"][:500000]):
+    print(index)
+    # 660 is the starting index
+    folder_index = 900 + (index // 10000)
+    target_subfolder = f"{folder_index:05d}"
+    # format of the image name: 00000xxxx.jpg
+    target_image_name = f"{folder_index:05d}{index % 10000:04d}.jpg"
+    target_image_path = os.path.join(target_folder, target_subfolder, target_image_name)
+    if not os.path.exists(os.path.join(target_folder, target_subfolder)):
+        os.makedirs(os.path.join(target_folder, target_subfolder))
+    # the default name of generated images is index.jpg
+    source_image_path = os.path.join(source_folder, f"{index}.jpg")
+    if os.path.exists(source_image_path):
+        shutil.copy(source_image_path, target_image_path)
+    random_prompt = random.choice(prompt_for_image)
+    new_annotation = {
+        "id": f"{target_subfolder}{index % 10000:04d}",
+        "image": f"{target_subfolder}/{target_image_name}",
+        "conversations": [
+            {"from": "human", "value": f"{random_prompt}\n<image>"},
+            {"from": "gpt", "value": annotation["caption"]},
+        ],
+    }
+    new_annotations.append(new_annotation)
+json_file_path = os.path.join(target_anno_folder, "coco_annotations_500k.json")
+with open(json_file_path, "w") as json_file:
+    json.dump(new_annotations, json_file, indent=4)

utils/merge_json.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import json
+import random
+# 加载第一个JSON
+json1_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json"
+json2_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/mscoco_raw_200k.json"
+with open(json1_path) as f:
+    data1 = json.load(f)
+# 加载第二个JSON
+with open(json2_path) as f:
+    data2 = json.load(f)
+# 检查data1和data2的类型
+if isinstance(data1, dict) and isinstance(data2, dict):
+    # 合并两个字典
+    merged_data = {**data1, **data2}
+elif isinstance(data1, list) and isinstance(data2, list):
+    # 合并两个列表
+    merged_data = data1 + data2
+else:
+    raise TypeError("data1 and data2 should be of the same type, either list or dict")
+random.shuffle(merged_data)
+# 将合并后的JSON写入新的文件
+with open(
+    "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json",
+    "w",
+) as f:
+    json.dump(merged_data, f)
+json_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json"
+with open(json_path) as f:
+    data1 = json.load(f)
+print(len(data1))

utils/split_dataset.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import json
+import os
+import math
+anno_json_path = (
+    "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_gen_558k.json"
+)
+with open(anno_json_path, "r") as f:
+    annotation_data = json.load(f)
+total_annotations = len(annotation_data)
+num_parts = 8
+annotations_per_part = math.ceil(total_annotations / num_parts)
+anno_output_dir = "../annotations/"
+if not os.path.exists(anno_output_dir):
+    os.makedirs(anno_output_dir)
+for i in range(num_parts):
+    start_idx = i * annotations_per_part
+    end_idx = min((i + 1) * annotations_per_part, total_annotations)
+    annotations_subset = annotation_data[start_idx:end_idx]
+    part_anno_json_path = os.path.join(
+        anno_output_dir, f"annotations_part_{i + 1}.json"
+    )
+    with open(part_anno_json_path, "w") as f:
+        json.dump(annotations_subset, f)
+    print(len(annotations_subset))
+print("标注已成功分成8份，并保存到文件夹中。")

utils/sum.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import os
+for i in range(1000, 1059):
+    total_files = 0
+    for root, dirs, files in os.walk(f"/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images/0{i}"):
+        total_files += len(files)
+    print(total_files)