henry000 commited on
Commit
3637d28
·
1 Parent(s): 1dfe70c

🔨 [Add] Converter to Transfrom JSON to txt format

Browse files
Files changed (1) hide show
  1. utils/converter_json2txt.py +84 -0
utils/converter_json2txt.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from typing import Dict, List, Optional
4
+
5
+ from tqdm import tqdm
6
+
7
+
8
+ def discretize_categories(categories: List[Dict[str, int]]) -> Dict[int, int]:
9
+ """
10
+ Maps each unique 'id' in the list of category dictionaries to a sequential integer index.
11
+ Indices are assigned based on the sorted 'id' values.
12
+ """
13
+ sorted_categories = sorted(categories, key=lambda category: category["id"])
14
+ return {category["id"]: index for index, category in enumerate(sorted_categories)}
15
+
16
+
17
+ def process_annotations(
18
+ image_annotations: Dict[int, List[Dict]],
19
+ image_info_dict: Dict[int, tuple],
20
+ output_dir: str,
21
+ id_to_idx: Optional[Dict[int, int]] = None,
22
+ ) -> None:
23
+ """
24
+ Process and save annotations to files, with option to remap category IDs.
25
+ """
26
+ for image_id, annotations in tqdm(image_annotations.items(), desc="Processing annotations"):
27
+ file_path = os.path.join(output_dir, f"{image_id:0>12}.txt")
28
+ with open(file_path, "w") as file:
29
+ for annotation in annotations:
30
+ process_annotation(annotation, image_info_dict[image_id], id_to_idx, file)
31
+
32
+
33
+ def process_annotation(annotation: Dict, image_dims: tuple, id_to_idx: Optional[Dict[int, int]], file) -> None:
34
+ """
35
+ Convert a single annotation's segmentation and write it to the open file handle.
36
+ """
37
+ category_id = annotation["category_id"]
38
+ segmentation = (
39
+ annotation["segmentation"][0]
40
+ if annotation["segmentation"] and isinstance(annotation["segmentation"][0], list)
41
+ else None
42
+ )
43
+
44
+ if segmentation is None:
45
+ return
46
+
47
+ img_width, img_height = image_dims
48
+ normalized_segmentation = normalize_segmentation(segmentation, img_width, img_height)
49
+
50
+ if id_to_idx:
51
+ category_id = id_to_idx.get(category_id, category_id)
52
+
53
+ file.write(f"{category_id} {' '.join(normalized_segmentation)}\n")
54
+
55
+
56
+ def normalize_segmentation(segmentation: List[float], img_width: int, img_height: int) -> List[str]:
57
+ """
58
+ Normalize and format segmentation coordinates.
59
+ """
60
+ return [f"{x/img_width:.6f}" if i % 2 == 0 else f"{x/img_height:.6f}" for i, x in enumerate(segmentation)]
61
+
62
+
63
+ def convert_annotations(json_file: str, output_dir: str) -> None:
64
+ """
65
+ Load annotation data from a JSON file and process all annotations.
66
+ """
67
+ with open(json_file) as file:
68
+ data = json.load(file)
69
+
70
+ os.makedirs(output_dir, exist_ok=True)
71
+
72
+ image_info_dict = {img["id"]: (img["width"], img["height"]) for img in data.get("images", [])}
73
+ id_to_idx = discretize_categories(data.get("categories", [])) if "categories" in data else None
74
+ image_annotations = {img_id: [] for img_id in image_info_dict}
75
+
76
+ for annotation in data.get("annotations", []):
77
+ if not annotation.get("iscrowd", False):
78
+ image_annotations[annotation["image_id"]].append(annotation)
79
+
80
+ process_annotations(image_annotations, image_info_dict, output_dir, id_to_idx)
81
+
82
+
83
+ convert_annotations("./data/coco/annotations/instances_train2017.json", "./data/coco/labels/train2017/")
84
+ convert_annotations("./data/coco/annotations/instances_val2017.json", "./data/coco/labels/val2017/")