starriver030515 commited on
Commit
a501a0c
·
verified ·
1 Parent(s): 360ea8c

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ coco/annotations/captions_train2017.json filter=lfs diff=lfs merge=lfs -text
37
+ coco/annotations/instances_train2017.json filter=lfs diff=lfs merge=lfs -text
38
+ coco/annotations/instances_val2017.json filter=lfs diff=lfs merge=lfs -text
39
+ coco/annotations/person_keypoints_train2017.json filter=lfs diff=lfs merge=lfs -text
coco/annotations/captions_train2017.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b62086319480e0739ef390d04084515defb9c213ff13605a036061e33314317
3
+ size 91865115
coco/annotations/captions_val2017.json ADDED
The diff for this file is too large to render. See raw diff
 
coco/annotations/instances_train2017.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:610fce4944abdeb15354cc765333805529359d12d88f2f711393ca586901d01d
3
+ size 469785474
coco/annotations/instances_val2017.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c7f7908f1d7278341fae127d0da654f102f11bd7b21d8aeefa635b8c810b6f
3
+ size 19987840
coco/annotations/person_keypoints_train2017.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fc1549d934547c470384d8a207c38707ca33fc016d00e33b795e408603af83e
3
+ size 238884731
coco/annotations/person_keypoints_val2017.json ADDED
The diff for this file is too large to render. See raw diff
 
coco/images/train2017/train2017.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:958d3ea2e86aee31d7f32e395915482201aad3d268d848453568075f5f6fb6d5
3
+ size 19330868178
gen_pic.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from diffusers import DiffusionPipeline
3
+ import torch
4
+ from PIL import Image
5
+ import os
6
+ import json
7
+
8
+ parser = argparse.ArgumentParser(description="Diffusion Pipeline with Arguments")
9
+
10
+ parser.add_argument(
11
+ "--json_filename",
12
+ type=str,
13
+ required=True,
14
+ help="Path to the JSON file containing text data",
15
+ )
16
+ parser.add_argument(
17
+ "--cuda", type=int, required=True, help="CUDA device to use for processing"
18
+ )
19
+
20
+ args = parser.parse_args()
21
+ json_filename = args.json_filename
22
+ cuda_device = f"cuda:{args.cuda}"
23
+ print(json_filename, cuda_device)
24
+ model_path = "./sdxl"
25
+ image_dir = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images"
26
+ if not os.path.exists(image_dir):
27
+ os.makedirs(image_dir)
28
+
29
+ base = DiffusionPipeline.from_pretrained(
30
+ "stabilityai/stable-diffusion-xl-base-1.0",
31
+ torch_dtype=torch.float16,
32
+ variant="fp16",
33
+ use_safetensors=True,
34
+ )
35
+ # base.scheduler.step_schedule = {
36
+ # "start": 0.5,
37
+ # "end": 0.0,
38
+ # "interpolation_type": "linear",
39
+ # }
40
+ base.to(cuda_device)
41
+
42
+ refiner = DiffusionPipeline.from_pretrained(
43
+ "stabilityai/stable-diffusion-xl-refiner-1.0",
44
+ text_encoder_2=base.text_encoder_2,
45
+ vae=base.vae,
46
+ torch_dtype=torch.float16,
47
+ use_safetensors=True,
48
+ variant="fp16",
49
+ )
50
+ # refiner.scheduler.step_schedule = {
51
+ # "start": 0.5,
52
+ # "end": 0.0,
53
+ # "interpolation_type": "linear",
54
+ # }
55
+ refiner.to(cuda_device)
56
+
57
+ with open(json_filename, "r") as f:
58
+ text_data = json.load(f)
59
+
60
+ n_steps = 60
61
+ high_noise_frac = 0.8
62
+ guidance_scale = 20
63
+ for text in text_data:
64
+ image = base(
65
+ prompt=text["conversations"][1]["value"],
66
+ num_inference_steps=n_steps,
67
+ denoising_end=high_noise_frac,
68
+ output_type="latent",
69
+ guidance_scale=guidance_scale,
70
+ ).images
71
+
72
+ image = refiner(
73
+ prompt=text["conversations"][1]["value"],
74
+ num_inference_steps=n_steps,
75
+ denoising_start=high_noise_frac,
76
+ image=image,
77
+ guidance_scale=guidance_scale,
78
+ ).images[0]
79
+ subdir = text["image"].split("/")[0]
80
+ if not os.path.exists(os.path.join(image_dir, subdir)):
81
+ os.makedirs(os.path.join(image_dir, subdir))
82
+ image_path = os.path.join(image_dir, text["image"])
83
+ image.save(image_path)
84
+
85
+ print("所有图像已成功生成并保存。")
generate.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ srun -p s2_bigdata --gres=gpu:1 --kill-on-bad-exit=1 python gen_pic.py --json_filename=/mnt/petrelfs/zhuchenglin/diffusion/annotations/annotations_part_8.json --cuda=0
utils/change_pic_name.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import json
4
+ import random
5
+
6
+
7
+ # path to the generated images
8
+ source_image_folder = "/mnt/petrelfs/zhuchenglin/diffusion/images_large"
9
+ # path to the target folder
10
+ target_image_folder = (
11
+ "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images"
12
+ )
13
+ # path to the COCO annotations file
14
+ annotations_coco_path = (
15
+ "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json"
16
+ )
17
+ with open(annotations_coco_path, "r") as f:
18
+ annotations = json.load(f)
19
+
20
+ new_annotations = []
21
+ for index, annotation in enumerate(annotations["annotations"][:200000]):
22
+ print(index)
23
+ image_id, pid = annotation["image_id"], annotation["id"]
24
+ source_image_path = os.path.join(
25
+ source_image_folder, f"{image_id:012d}_{pid}_gen.jpg"
26
+ )
27
+ target_image_path = os.path.join(target_image_folder, f"{index}.jpg")
28
+ if os.path.exists(source_image_path):
29
+ shutil.copy(source_image_path, target_image_path)
utils/copy_pic.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import json
4
+
5
+ source_folder = '/mnt/petrelfs/zhuchenglin/diffusion/coco/images/train2017'
6
+ target_folder = '/mnt/petrelfs/zhuchenglin/diffusion/images_large'
7
+
8
+ if not os.path.exists(target_folder):
9
+ os.makedirs(target_folder)
10
+
11
+ anno_json_path = "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json"
12
+ with open(anno_json_path, 'r') as f:
13
+ annotation_data = json.load(f)
14
+ annotations = annotation_data["annotations"][:200000]
15
+
16
+ count = 0
17
+ for image in annotations:
18
+ source_path = os.path.join(source_folder, f'{image["image_id"]:012}.jpg')
19
+ target_path = os.path.join(target_folder, f'{image["image_id"]:012}.jpg')
20
+ count += 1
21
+ print(source_path,count)
22
+ shutil.copy(source_path, target_path)
utils/count.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ def count_images(directory):
4
+ # 支持的图片文件扩展名
5
+ image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}
6
+ image_count = 0
7
+
8
+ # 遍历指定目录及其子目录
9
+ for root, dirs, files in os.walk(directory):
10
+ for file in files:
11
+ # 检查文件扩展名是否在支持的扩展名集合中
12
+ if os.path.splitext(file)[1].lower() in image_extensions:
13
+ image_count += 1
14
+
15
+ return image_count
16
+
17
+ # 示例用法:替换 'path_to_your_directory' 为你的目录路径
18
+ directory_path = '/mnt/petrelfs/zhuchenglin/diffusion/images_large'
19
+ print(f"Total images in '{directory_path}':", count_images(directory_path))
utils/download_coco.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import json
3
+ import os
4
+ import requests
5
+
6
+ # 参数配置
7
+ base_url = "https://datasets-server.huggingface.co/rows"
8
+ dataset_path = "cat-state/mscoco-1st-caption"
9
+ config = "default"
10
+ split = "train"
11
+ offset = 0
12
+ length = 100
13
+ total_data = 1000 # 目标获取的数据总量
14
+ iterations = total_data // length # 需要循环的次数
15
+
16
+ image_dir = "../images_large"
17
+ if not os.path.exists(image_dir):
18
+ os.makedirs(image_dir)
19
+
20
+ text_data = {}
21
+
22
+ # 循环多次,以获取全部数据
23
+ for i in range(iterations):
24
+ # 构建请求 URL
25
+ url = f"{base_url}?dataset={dataset_path}&config={config}&split={split}&offset={offset}&length={length}"
26
+
27
+ # 获取数据
28
+ result = subprocess.run(
29
+ ["curl", "-X", "GET", url],
30
+ capture_output=True,
31
+ text=True
32
+ )
33
+
34
+ output = result.stdout
35
+
36
+ try:
37
+ data_dict = json.loads(output)
38
+ except json.JSONDecodeError:
39
+ print(f"无法将输出转换为字典。输出内容: {output}")
40
+ continue
41
+
42
+ if 'rows' in data_dict:
43
+ for item in data_dict['rows']:
44
+ row_idx = item['row_idx']
45
+ row = item['row']
46
+ image_url = row.get('url')
47
+ text = row.get('caption')
48
+
49
+ if image_url:
50
+ image_filename = f"{image_dir}/{row_idx}_row_image.jpg"
51
+ response = requests.get(image_url, stream=True)
52
+ if response.status_code == 200:
53
+ with open(image_filename, 'wb') as f:
54
+ for chunk in response.iter_content(chunk_size=8192):
55
+ f.write(chunk)
56
+
57
+ text_data[f"{row_idx}_row_image"] = text
58
+
59
+ offset += length # 更新偏移量以获取下一批数据
60
+
61
+ # 保存文本数据
62
+ json_filename = "../data/row_image_texts_large.json"
63
+ with open(json_filename, 'w') as f:
64
+ json.dump(text_data, f, indent=4)
65
+
66
+ print("图像下载并保存完成,文本信息已保存到 row_image_texts.json")
67
+
utils/download_model.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from huggingface_hub import snapshot_download
2
+ snapshot_download(
3
+ repo_id="stabilityai/stable-diffusion-xl-base-1.0",
4
+ local_dir="../sdxl",
5
+ max_workers=8
6
+ )
utils/expand_dataset.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import json
4
+ import random
5
+
6
+ # prompt in llava
7
+ prompt_for_image = [
8
+ "Describe the image concisely.",
9
+ "Provide a brief description of the given image.",
10
+ "Offer a succinct explanation of the picture presented.",
11
+ "Summarize the visual content of the image."
12
+ "Give a short and clear explanation of the subsequent image.",
13
+ "Share a concise interpretation of the image provided.",
14
+ "Present a compact description of the photo's key features.",
15
+ "Relay a brief, clear account of the picture shown.",
16
+ "Render a clear and concise summary of the photo.",
17
+ "Write a terse but informative summary of the picture.",
18
+ "Create a compact narrative representing the image presented.",
19
+ ]
20
+
21
+ # path to the generated images
22
+ source_folder = "/mnt/petrelfs/zhuchenglin/diffusion/images_large1"
23
+ # path to the llava training images, which has the initial 660 subfolders
24
+ target_folder = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images"
25
+ # path to the llava training annotations folder
26
+ target_anno_folder = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain"
27
+ # path to the COCO annotations file
28
+ annotations_coco_path = (
29
+ "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json"
30
+ )
31
+ with open(annotations_coco_path, "r") as f:
32
+ annotations = json.load(f)
33
+
34
+ new_annotations = []
35
+ for index, annotation in enumerate(annotations["annotations"][:500000]):
36
+ print(index)
37
+ # 660 is the starting index
38
+ folder_index = 900 + (index // 10000)
39
+ target_subfolder = f"{folder_index:05d}"
40
+
41
+ # format of the image name: 00000xxxx.jpg
42
+ target_image_name = f"{folder_index:05d}{index % 10000:04d}.jpg"
43
+ target_image_path = os.path.join(target_folder, target_subfolder, target_image_name)
44
+ if not os.path.exists(os.path.join(target_folder, target_subfolder)):
45
+ os.makedirs(os.path.join(target_folder, target_subfolder))
46
+
47
+ # the default name of generated images is index.jpg
48
+ source_image_path = os.path.join(source_folder, f"{index}.jpg")
49
+ if os.path.exists(source_image_path):
50
+ shutil.copy(source_image_path, target_image_path)
51
+
52
+ random_prompt = random.choice(prompt_for_image)
53
+ new_annotation = {
54
+ "id": f"{target_subfolder}{index % 10000:04d}",
55
+ "image": f"{target_subfolder}/{target_image_name}",
56
+ "conversations": [
57
+ {"from": "human", "value": f"{random_prompt}\n<image>"},
58
+ {"from": "gpt", "value": annotation["caption"]},
59
+ ],
60
+ }
61
+ new_annotations.append(new_annotation)
62
+
63
+ json_file_path = os.path.join(target_anno_folder, "coco_annotations_500k.json")
64
+ with open(json_file_path, "w") as json_file:
65
+ json.dump(new_annotations, json_file, indent=4)
utils/merge_json.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import random
3
+
4
+ # 加载第一个JSON
5
+ json1_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json"
6
+ json2_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/mscoco_raw_200k.json"
7
+ with open(json1_path) as f:
8
+ data1 = json.load(f)
9
+
10
+ # 加载第二个JSON
11
+ with open(json2_path) as f:
12
+ data2 = json.load(f)
13
+
14
+ # 检查data1和data2的类型
15
+ if isinstance(data1, dict) and isinstance(data2, dict):
16
+ # 合并两个字典
17
+ merged_data = {**data1, **data2}
18
+ elif isinstance(data1, list) and isinstance(data2, list):
19
+ # 合并两个列表
20
+ merged_data = data1 + data2
21
+ else:
22
+ raise TypeError("data1 and data2 should be of the same type, either list or dict")
23
+
24
+ random.shuffle(merged_data)
25
+
26
+ # 将合并后的JSON写入新的文件
27
+ with open(
28
+ "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json",
29
+ "w",
30
+ ) as f:
31
+ json.dump(merged_data, f)
32
+ json_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json"
33
+ with open(json_path) as f:
34
+ data1 = json.load(f)
35
+ print(len(data1))
utils/split_dataset.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import math
4
+
5
+ anno_json_path = (
6
+ "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_gen_558k.json"
7
+ )
8
+ with open(anno_json_path, "r") as f:
9
+ annotation_data = json.load(f)
10
+
11
+ total_annotations = len(annotation_data)
12
+ num_parts = 8
13
+ annotations_per_part = math.ceil(total_annotations / num_parts)
14
+
15
+ anno_output_dir = "../annotations/"
16
+ if not os.path.exists(anno_output_dir):
17
+ os.makedirs(anno_output_dir)
18
+
19
+ for i in range(num_parts):
20
+ start_idx = i * annotations_per_part
21
+ end_idx = min((i + 1) * annotations_per_part, total_annotations)
22
+ annotations_subset = annotation_data[start_idx:end_idx]
23
+ part_anno_json_path = os.path.join(
24
+ anno_output_dir, f"annotations_part_{i + 1}.json"
25
+ )
26
+ with open(part_anno_json_path, "w") as f:
27
+ json.dump(annotations_subset, f)
28
+ print(len(annotations_subset))
29
+
30
+ print("标注已成功分成8份,并保存到文件夹中。")
utils/sum.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+ for i in range(1000, 1059):
3
+ total_files = 0
4
+ for root, dirs, files in os.walk(f"/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images/0{i}"):
5
+ total_files += len(files)
6
+ print(total_files)