sdsdsdadasd3 commited on
Commit
c186cfb
β€’
1 Parent(s): abfb3a3

[Add] Add scripts for preparing benchmark datasets.

Browse files
.gitignore CHANGED
@@ -166,4 +166,6 @@ cython_debug/
166
  /eval/*csv
167
  *__pycache__
168
  scripts/
169
- eval/
 
 
 
166
  /eval/*csv
167
  *__pycache__
168
  scripts/
169
+ eval/
170
+ *.DS_Store
171
+ benchmark/datasets
README.md CHANGED
@@ -27,6 +27,7 @@ arXiv preprint, 2024
27
 
28
  ## πŸ”† Introduction
29
 
 
30
  - [24-9-18] Add point cloud sequence visualization.
31
  - [24-9-14] πŸ”₯πŸ”₯πŸ”₯ **DepthCrafter** is released now, have fun!
32
 
 
27
 
28
  ## πŸ”† Introduction
29
 
30
+ - [24-9-19] Add scripts for preparing benchmark datasets.
31
  - [24-9-18] Add point cloud sequence visualization.
32
  - [24-9-14] πŸ”₯πŸ”₯πŸ”₯ **DepthCrafter** is released now, have fun!
33
 
benchmark/dataset_extract_bonn.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import os.path as osp
4
+ from PIL import Image
5
+ from tqdm import tqdm
6
+ import imageio
7
+ import csv
8
+
9
+
10
+ def depth_read(filename):
11
+ # loads depth map D from png file
12
+ # and returns it as a numpy array
13
+
14
+ depth_png = np.asarray(Image.open(filename))
15
+ # make sure we have a proper 16bit depth map here.. not 8bit!
16
+ assert np.max(depth_png) > 255
17
+
18
+ depth = depth_png.astype(np.float64) / 5000.0
19
+ depth[depth_png == 0] = -1.0
20
+ return depth
21
+
22
+
23
+ def extract_bonn(
24
+ root,
25
+ depth_root,
26
+ sample_len=-1,
27
+ csv_save_path="",
28
+ datatset_name="",
29
+ saved_rgb_dir="",
30
+ saved_disp_dir="",
31
+ start_frame=0,
32
+ end_frame=110,
33
+ ):
34
+ scenes_names = os.listdir(depth_root)
35
+ all_samples = []
36
+ for i, seq_name in enumerate(tqdm(scenes_names)):
37
+ # load all images
38
+ all_img_names = os.listdir(osp.join(depth_root, seq_name, "rgb"))
39
+ all_img_names = [x for x in all_img_names if x.endswith(".png")]
40
+ print(f"sequence frame number: {len(all_img_names)}")
41
+
42
+ # for not zero padding image name
43
+ all_img_names.sort()
44
+ all_img_names = sorted(all_img_names, key=lambda x: int(x.split(".")[0][-4:]))
45
+ all_img_names = all_img_names[start_frame:end_frame]
46
+
47
+ all_depth_names = os.listdir(osp.join(depth_root, seq_name, "depth"))
48
+ all_depth_names = [x for x in all_depth_names if x.endswith(".png")]
49
+ print(f"sequence depth number: {len(all_depth_names)}")
50
+
51
+ # for not zero padding image name
52
+ all_depth_names.sort()
53
+ all_depth_names = sorted(
54
+ all_depth_names, key=lambda x: int(x.split(".")[0][-4:])
55
+ )
56
+ all_depth_names = all_depth_names[start_frame:end_frame]
57
+
58
+ seq_len = len(all_img_names)
59
+ step = sample_len if sample_len > 0 else seq_len
60
+
61
+ for ref_idx in range(0, seq_len, step):
62
+ print(f"Progress: {seq_name}, {ref_idx // step + 1} / {seq_len//step}")
63
+
64
+ video_imgs = []
65
+ video_depths = []
66
+
67
+ if (ref_idx + step) <= seq_len:
68
+ ref_e = ref_idx + step
69
+ else:
70
+ continue
71
+
72
+ # for idx in range(ref_idx, ref_idx + step):
73
+ for idx in range(ref_idx, ref_e):
74
+ im_path = osp.join(root, seq_name, "rgb", all_img_names[idx])
75
+ depth_path = osp.join(
76
+ depth_root, seq_name, "depth", all_depth_names[idx]
77
+ )
78
+
79
+ depth = depth_read(depth_path)
80
+ disp = depth
81
+
82
+ video_depths.append(disp)
83
+ video_imgs.append(np.array(Image.open(im_path)))
84
+
85
+ disp_video = np.array(video_depths)[:, None] # [:, 0:1, :, :, 0]
86
+ img_video = np.array(video_imgs)[..., 0:3] # [:, 0, :, :, 0:3]
87
+
88
+ print(disp_video.max(), disp_video.min())
89
+
90
+ def even_or_odd(num):
91
+ if num % 2 == 0:
92
+ return num
93
+ else:
94
+ return num - 1
95
+
96
+ # print(disp_video.shape)
97
+ # print(img_video.shape)
98
+ height = disp_video.shape[-2]
99
+ width = disp_video.shape[-1]
100
+ height = even_or_odd(height)
101
+ width = even_or_odd(width)
102
+ disp_video = disp_video[:, :, 0:height, 0:width]
103
+ img_video = img_video[:, 0:height, 0:width]
104
+
105
+ data_root = saved_rgb_dir + datatset_name
106
+ disp_root = saved_disp_dir + datatset_name
107
+ os.makedirs(data_root, exist_ok=True)
108
+ os.makedirs(disp_root, exist_ok=True)
109
+
110
+ img_video_dir = data_root
111
+ disp_video_dir = disp_root
112
+
113
+ img_video_path = os.path.join(img_video_dir, f"{seq_name}_rgb_left.mp4")
114
+ disp_video_path = os.path.join(disp_video_dir, f"{seq_name}_disparity.npz")
115
+
116
+ imageio.mimsave(
117
+ img_video_path, img_video, fps=15, quality=9, macro_block_size=1
118
+ )
119
+ np.savez(disp_video_path, disparity=disp_video)
120
+
121
+ sample = {}
122
+ sample["filepath_left"] = os.path.join(
123
+ f"{datatset_name}/{seq_name}_rgb_left.mp4"
124
+ ) # img_video_path
125
+ sample["filepath_disparity"] = os.path.join(
126
+ f"{datatset_name}/{seq_name}_disparity.npz"
127
+ ) # disp_video_path
128
+
129
+ all_samples.append(sample)
130
+
131
+ # save csv file
132
+
133
+ filename_ = csv_save_path
134
+ os.makedirs(os.path.dirname(filename_), exist_ok=True)
135
+ fields = ["filepath_left", "filepath_disparity"]
136
+ with open(filename_, "w") as csvfile:
137
+ writer = csv.DictWriter(csvfile, fieldnames=fields)
138
+ writer.writeheader()
139
+ writer.writerows(all_samples)
140
+
141
+ print(f"{filename_} has been saved.")
142
+
143
+
144
+ if __name__ == "__main__":
145
+ extract_bonn(
146
+ root="path/to/Bonn-RGBD",
147
+ depth_root="path/to/Bonn-RGBD",
148
+ saved_rgb_dir="./benchmark/datasets/",
149
+ saved_disp_dir="./benchmark/datasets/",
150
+ csv_save_path=f"./benchmark/datasets/bonn.csv",
151
+ sample_len=-1,
152
+ datatset_name="bonn",
153
+ start_frame=30,
154
+ end_frame=140,
155
+ )
benchmark/dataset_extract_kitti.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import os.path as osp
4
+ from PIL import Image
5
+ from tqdm import tqdm
6
+ import csv
7
+ import imageio
8
+
9
+
10
+ def depth_read(filename):
11
+ # loads depth map D from png file
12
+ # and returns it as a numpy array,
13
+
14
+ depth_png = np.array(Image.open(filename), dtype=int)
15
+ # make sure we have a proper 16bit depth map here.. not 8bit!
16
+ assert np.max(depth_png) > 255
17
+
18
+ depth = depth_png.astype(np.float64) / 256.0
19
+ depth[depth_png == 0] = -1.0
20
+ return depth
21
+
22
+
23
+ def extract_kitti(
24
+ root,
25
+ depth_root,
26
+ sample_len=-1,
27
+ csv_save_path="",
28
+ datatset_name="",
29
+ saved_rgb_dir="",
30
+ saved_disp_dir="",
31
+ start_frame=0,
32
+ end_frame=110,
33
+ ):
34
+ scenes_names = os.listdir(depth_root)
35
+ all_samples = []
36
+ for i, seq_name in enumerate(tqdm(scenes_names)):
37
+ all_img_names = os.listdir(
38
+ osp.join(depth_root, seq_name, "proj_depth/groundtruth/image_02")
39
+ )
40
+ all_img_names = [x for x in all_img_names if x.endswith(".png")]
41
+ print(f"sequence frame number: {len(all_img_names)}")
42
+
43
+ all_img_names.sort()
44
+ all_img_names = sorted(all_img_names, key=lambda x: int(x.split(".")[0][-4:]))
45
+ all_img_names = all_img_names[start_frame:end_frame]
46
+
47
+ seq_len = len(all_img_names)
48
+ step = sample_len if sample_len > 0 else seq_len
49
+
50
+ for ref_idx in range(0, seq_len, step):
51
+ print(f"Progress: {seq_name}, {ref_idx // step + 1} / {seq_len//step}")
52
+
53
+ video_imgs = []
54
+ video_depths = []
55
+
56
+ if (ref_idx + step) <= seq_len:
57
+ ref_e = ref_idx + step
58
+ else:
59
+ continue
60
+
61
+ for idx in range(ref_idx, ref_e):
62
+ im_path = osp.join(
63
+ root, seq_name[0:10], seq_name, "image_02/data", all_img_names[idx]
64
+ )
65
+ depth_path = osp.join(
66
+ depth_root,
67
+ seq_name,
68
+ "proj_depth/groundtruth/image_02",
69
+ all_img_names[idx],
70
+ )
71
+
72
+ depth = depth_read(depth_path)
73
+ disp = depth
74
+
75
+ video_depths.append(disp)
76
+ video_imgs.append(np.array(Image.open(im_path)))
77
+
78
+ disp_video = np.array(video_depths)[:, None]
79
+ img_video = np.array(video_imgs)[..., 0:3]
80
+
81
+ def even_or_odd(num):
82
+ if num % 2 == 0:
83
+ return num
84
+ else:
85
+ return num - 1
86
+
87
+ height = disp_video.shape[-2]
88
+ width = disp_video.shape[-1]
89
+ height = even_or_odd(height)
90
+ width = even_or_odd(width)
91
+ disp_video = disp_video[:, :, 0:height, 0:width]
92
+ img_video = img_video[:, 0:height, 0:width]
93
+
94
+ data_root = saved_rgb_dir + datatset_name
95
+ disp_root = saved_disp_dir + datatset_name
96
+ os.makedirs(data_root, exist_ok=True)
97
+ os.makedirs(disp_root, exist_ok=True)
98
+
99
+ img_video_dir = data_root
100
+ disp_video_dir = disp_root
101
+
102
+ img_video_path = os.path.join(img_video_dir, f"{seq_name}_rgb_left.mp4")
103
+ disp_video_path = os.path.join(disp_video_dir, f"{seq_name}_disparity.npz")
104
+
105
+ imageio.mimsave(
106
+ img_video_path, img_video, fps=15, quality=10, macro_block_size=1
107
+ )
108
+ np.savez(disp_video_path, disparity=disp_video)
109
+
110
+ sample = {}
111
+ sample["filepath_left"] = os.path.join(f"KITTI/{seq_name}_rgb_left.mp4")
112
+ sample["filepath_disparity"] = os.path.join(
113
+ f"KITTI/{seq_name}_disparity.npz"
114
+ )
115
+
116
+ all_samples.append(sample)
117
+
118
+ filename_ = csv_save_path
119
+ os.makedirs(os.path.dirname(filename_), exist_ok=True)
120
+ fields = ["filepath_left", "filepath_disparity"]
121
+ with open(filename_, "w") as csvfile:
122
+ writer = csv.DictWriter(csvfile, fieldnames=fields)
123
+ writer.writeheader()
124
+ writer.writerows(all_samples)
125
+
126
+ print(f"{filename_} has been saved.")
127
+
128
+
129
+ if __name__ == "__main__":
130
+ extract_kitti(
131
+ root="path/to/KITTI/raw_data",
132
+ depth_root="path/to/KITTI/data_depth_annotated/val",
133
+ saved_rgb_dir="./benchmark/datasets/",
134
+ saved_disp_dir="./benchmark/datasets/",
135
+ csv_save_path=f"./benchmark/datasets/KITTI.csv",
136
+ sample_len=-1,
137
+ datatset_name="KITTI",
138
+ start_frame=0,
139
+ end_frame=110,
140
+ )
benchmark/dataset_extract_nyu.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import os.path as osp
4
+ from PIL import Image
5
+ from tqdm import tqdm
6
+ import csv
7
+ import imageio
8
+
9
+
10
+ def _read_image(img_rel_path) -> np.ndarray:
11
+ image_to_read = img_rel_path
12
+ image = Image.open(image_to_read)
13
+ image = np.asarray(image)
14
+ return image
15
+
16
+
17
+ def depth_read(filename):
18
+ depth_in = _read_image(filename)
19
+ depth_decoded = depth_in / 1000.0
20
+ return depth_decoded
21
+
22
+
23
+ def extract_nyu(
24
+ root,
25
+ depth_root,
26
+ csv_save_path="",
27
+ datatset_name="",
28
+ filename_ls_path="",
29
+ saved_rgb_dir="",
30
+ saved_disp_dir="",
31
+ ):
32
+ with open(filename_ls_path, "r") as f:
33
+ filenames = [s.split() for s in f.readlines()]
34
+
35
+ all_samples = []
36
+ for i, pair_names in enumerate(tqdm(filenames)):
37
+ img_name = pair_names[0]
38
+ filled_depth_name = pair_names[2]
39
+
40
+ im_path = osp.join(root, img_name)
41
+ depth_path = osp.join(depth_root, filled_depth_name)
42
+
43
+ depth = depth_read(depth_path)
44
+ disp = depth
45
+
46
+ video_depths = [disp]
47
+ video_imgs = [np.array(Image.open(im_path))]
48
+
49
+ disp_video = np.array(video_depths)[:, None]
50
+ img_video = np.array(video_imgs)[..., 0:3]
51
+
52
+ disp_video = disp_video[:, :, 45:471, 41:601]
53
+ img_video = img_video[:, 45:471, 41:601, :]
54
+
55
+ data_root = saved_rgb_dir + datatset_name
56
+ disp_root = saved_disp_dir + datatset_name
57
+ os.makedirs(data_root, exist_ok=True)
58
+ os.makedirs(disp_root, exist_ok=True)
59
+
60
+ img_video_dir = data_root
61
+ disp_video_dir = disp_root
62
+
63
+ img_video_path = os.path.join(img_video_dir, f"{img_name[:-4]}_rgb_left.mp4")
64
+ disp_video_path = os.path.join(disp_video_dir, f"{img_name[:-4]}_disparity.npz")
65
+
66
+ dir_name = os.path.dirname(img_video_path)
67
+ os.makedirs(dir_name, exist_ok=True)
68
+ dir_name = os.path.dirname(disp_video_path)
69
+ os.makedirs(dir_name, exist_ok=True)
70
+
71
+ imageio.mimsave(
72
+ img_video_path, img_video, fps=15, quality=10, macro_block_size=1
73
+ )
74
+ np.savez(disp_video_path, disparity=disp_video)
75
+
76
+ sample = {}
77
+ sample["filepath_left"] = os.path.join(
78
+ f"{datatset_name}/{img_name[:-4]}_rgb_left.mp4"
79
+ )
80
+ sample["filepath_disparity"] = os.path.join(
81
+ f"{datatset_name}/{img_name[:-4]}_disparity.npz"
82
+ )
83
+
84
+ all_samples.append(sample)
85
+
86
+ filename_ = csv_save_path
87
+ os.makedirs(os.path.dirname(filename_), exist_ok=True)
88
+ fields = ["filepath_left", "filepath_disparity"]
89
+ with open(filename_, "w") as csvfile:
90
+ writer = csv.DictWriter(csvfile, fieldnames=fields)
91
+ writer.writeheader()
92
+ writer.writerows(all_samples)
93
+
94
+ print(f"{filename_} has been saved.")
95
+
96
+
97
+ if __name__ == "__main__":
98
+ extract_nyu(
99
+ root="path/to/NYUv2/",
100
+ depth_root="path/to/NYUv2/",
101
+ filename_ls_path="path/to/NYUv2/filename_list_test.txt",
102
+ saved_rgb_dir="./benchmark/datasets/",
103
+ saved_disp_dir="./benchmark/datasets/",
104
+ csv_save_path=f"./benchmark/datasets/NYUv2.csv",
105
+ datatset_name="NYUv2",
106
+ )
benchmark/dataset_extract_scannet.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import os.path as osp
4
+ from PIL import Image
5
+ from tqdm import tqdm
6
+ import csv
7
+ import imageio
8
+
9
+
10
+ def _read_image(img_rel_path) -> np.ndarray:
11
+ image_to_read = img_rel_path
12
+ image = Image.open(image_to_read) # [H, W, rgb]
13
+ image = np.asarray(image)
14
+ return image
15
+
16
+
17
+ def depth_read(filename):
18
+ depth_in = _read_image(filename)
19
+ depth_decoded = depth_in / 1000.0
20
+ return depth_decoded
21
+
22
+
23
+ def extract_scannet(
24
+ root,
25
+ sample_len=-1,
26
+ csv_save_path="",
27
+ datatset_name="",
28
+ scene_number=16,
29
+ scene_frames_len=120,
30
+ stride=1,
31
+ saved_rgb_dir="",
32
+ saved_disp_dir="",
33
+ ):
34
+ scenes_names = os.listdir(root)
35
+ scenes_names = sorted(scenes_names)[:scene_number]
36
+ all_samples = []
37
+ for i, seq_name in enumerate(tqdm(scenes_names)):
38
+ all_img_names = os.listdir(osp.join(root, seq_name, "color"))
39
+ all_img_names = [x for x in all_img_names if x.endswith(".jpg")]
40
+ all_img_names = sorted(all_img_names, key=lambda x: int(x.split(".")[0]))
41
+ all_img_names = all_img_names[:scene_frames_len:stride]
42
+ print(f"sequence frame number: {len(all_img_names)}")
43
+
44
+ seq_len = len(all_img_names)
45
+ step = sample_len if sample_len > 0 else seq_len
46
+
47
+ for ref_idx in range(0, seq_len, step):
48
+ print(f"Progress: {seq_name}, {ref_idx // step + 1} / {seq_len//step}")
49
+
50
+ video_imgs = []
51
+ video_depths = []
52
+
53
+ if (ref_idx + step) <= seq_len:
54
+ ref_e = ref_idx + step
55
+ else:
56
+ continue
57
+
58
+ for idx in range(ref_idx, ref_e):
59
+ im_path = osp.join(root, seq_name, "color", all_img_names[idx])
60
+ depth_path = osp.join(
61
+ root, seq_name, "depth", all_img_names[idx][:-3] + "png"
62
+ )
63
+
64
+ depth = depth_read(depth_path)
65
+ disp = depth
66
+
67
+ video_depths.append(disp)
68
+ video_imgs.append(np.array(Image.open(im_path)))
69
+
70
+ disp_video = np.array(video_depths)[:, None]
71
+ img_video = np.array(video_imgs)[..., 0:3]
72
+
73
+ disp_video = disp_video[:, :, 8:-8, 11:-11]
74
+ img_video = img_video[:, 8:-8, 11:-11, :]
75
+
76
+ data_root = saved_rgb_dir + datatset_name
77
+ disp_root = saved_disp_dir + datatset_name
78
+ os.makedirs(data_root, exist_ok=True)
79
+ os.makedirs(disp_root, exist_ok=True)
80
+
81
+ img_video_dir = data_root
82
+ disp_video_dir = disp_root
83
+
84
+ img_video_path = os.path.join(img_video_dir, f"{seq_name}_rgb_left.mp4")
85
+ disp_video_path = os.path.join(disp_video_dir, f"{seq_name}_disparity.npz")
86
+
87
+ imageio.mimsave(
88
+ img_video_path, img_video, fps=15, quality=9, macro_block_size=1
89
+ )
90
+ np.savez(disp_video_path, disparity=disp_video)
91
+
92
+ sample = {}
93
+ sample["filepath_left"] = os.path.join(
94
+ f"{datatset_name}/{seq_name}_rgb_left.mp4"
95
+ )
96
+ sample["filepath_disparity"] = os.path.join(
97
+ f"{datatset_name}/{seq_name}_disparity.npz"
98
+ )
99
+
100
+ all_samples.append(sample)
101
+
102
+ filename_ = csv_save_path
103
+ os.makedirs(os.path.dirname(filename_), exist_ok=True)
104
+ fields = ["filepath_left", "filepath_disparity"]
105
+ with open(filename_, "w") as csvfile:
106
+ writer = csv.DictWriter(csvfile, fieldnames=fields)
107
+ writer.writeheader()
108
+ writer.writerows(all_samples)
109
+
110
+ print(f"{filename_} has been saved.")
111
+
112
+
113
+ if __name__ == "__main__":
114
+ extract_scannet(
115
+ root="path/to/ScanNet_v2/raw/scans_test",
116
+ saved_rgb_dir="./benchmark/datasets/",
117
+ saved_disp_dir="./benchmark/datasets/",
118
+ csv_save_path=f"./benchmark/datasets/scannet.csv",
119
+ sample_len=-1,
120
+ datatset_name="scannet",
121
+ scene_number=100,
122
+ scene_frames_len=90 * 3,
123
+ stride=3,
124
+ )
benchmark/dataset_extract_sintel.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ # This source code is licensed under the license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ # # Data loading based on https://github.com/NVIDIA/flownet2-pytorch
7
+
8
+
9
+ import os
10
+ import numpy as np
11
+ import os.path as osp
12
+ from PIL import Image
13
+ from tqdm import tqdm
14
+ import csv
15
+ import imageio
16
+
17
+
18
+ # Check for endianness, based on Daniel Scharstein's optical flow code.
19
+ # Using little-endian architecture, these two should be equal.
20
+ TAG_FLOAT = 202021.25
21
+ TAG_CHAR = "PIEH"
22
+
23
+
24
+ def depth_read(filename):
25
+ """Read depth data from file, return as numpy array."""
26
+ f = open(filename, "rb")
27
+ check = np.fromfile(f, dtype=np.float32, count=1)[0]
28
+ assert (
29
+ check == TAG_FLOAT
30
+ ), " depth_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? ".format(
31
+ TAG_FLOAT, check
32
+ )
33
+ width = np.fromfile(f, dtype=np.int32, count=1)[0]
34
+ height = np.fromfile(f, dtype=np.int32, count=1)[0]
35
+ size = width * height
36
+ assert (
37
+ width > 0 and height > 0 and size > 1 and size < 100000000
38
+ ), " depth_read:: Wrong input size (width = {0}, height = {1}).".format(
39
+ width, height
40
+ )
41
+ depth = np.fromfile(f, dtype=np.float32, count=-1).reshape((height, width))
42
+ return depth
43
+
44
+
45
+ def extract_sintel(
46
+ root,
47
+ depth_root,
48
+ sample_len=-1,
49
+ csv_save_path="",
50
+ datatset_name="",
51
+ saved_rgb_dir="",
52
+ saved_disp_dir="",
53
+ ):
54
+ scenes_names = os.listdir(root)
55
+ all_samples = []
56
+ for i, seq_name in enumerate(tqdm(scenes_names)):
57
+ all_img_names = os.listdir(os.path.join(root, seq_name))
58
+ all_img_names = [x for x in all_img_names if x.endswith(".png")]
59
+ all_img_names.sort()
60
+ all_img_names = sorted(all_img_names, key=lambda x: int(x.split(".")[0][-4:]))
61
+
62
+ seq_len = len(all_img_names)
63
+ step = sample_len if sample_len > 0 else seq_len
64
+
65
+ for ref_idx in range(0, seq_len, step):
66
+ print(f"Progress: {seq_name}, {ref_idx // step} / {seq_len // step}")
67
+
68
+ video_imgs = []
69
+ video_depths = []
70
+
71
+ if (ref_idx + step) <= seq_len:
72
+ ref_e = ref_idx + step
73
+ else:
74
+ continue
75
+
76
+ for idx in range(ref_idx, ref_e):
77
+ im_path = osp.join(root, seq_name, all_img_names[idx])
78
+ depth_path = osp.join(
79
+ depth_root, seq_name, all_img_names[idx][:-3] + "dpt"
80
+ )
81
+
82
+ depth = depth_read(depth_path)
83
+ disp = depth
84
+
85
+ video_depths.append(disp)
86
+ video_imgs.append(np.array(Image.open(im_path)))
87
+
88
+ disp_video = np.array(video_depths)[:, None]
89
+ img_video = np.array(video_imgs)[..., 0:3]
90
+
91
+ data_root = saved_rgb_dir + datatset_name
92
+ disp_root = saved_disp_dir + datatset_name
93
+ os.makedirs(data_root, exist_ok=True)
94
+ os.makedirs(disp_root, exist_ok=True)
95
+
96
+ img_video_dir = data_root
97
+ disp_video_dir = disp_root
98
+
99
+ img_video_path = os.path.join(img_video_dir, f"{seq_name}_rgb_left.mp4")
100
+ disp_video_path = os.path.join(disp_video_dir, f"{seq_name}_disparity.npz")
101
+
102
+ imageio.mimsave(
103
+ img_video_path, img_video, fps=15, quality=10, macro_block_size=1
104
+ )
105
+ np.savez(disp_video_path, disparity=disp_video)
106
+
107
+ sample = {}
108
+ sample["filepath_left"] = os.path.join(
109
+ f"{datatset_name}/{seq_name}_rgb_left.mp4"
110
+ )
111
+ sample["filepath_disparity"] = os.path.join(
112
+ f"{datatset_name}/{seq_name}_disparity.npz"
113
+ )
114
+
115
+ all_samples.append(sample)
116
+
117
+ filename_ = csv_save_path
118
+ os.makedirs(os.path.dirname(filename_), exist_ok=True)
119
+ fields = ["filepath_left", "filepath_disparity"]
120
+ with open(filename_, "w") as csvfile:
121
+ writer = csv.DictWriter(csvfile, fieldnames=fields)
122
+ writer.writeheader()
123
+ writer.writerows(all_samples)
124
+
125
+ print(f"{filename_} has been saved.")
126
+
127
+
128
+ if __name__ == "__main__":
129
+ extract_sintel(
130
+ root="path/to/Sintel-Depth/training_image/clean",
131
+ depth_root="path/to/Sintel-Depth/MPI-Sintel-depth-training-20150305/training/depth",
132
+ saved_rgb_dir="./benchmark/datasets/",
133
+ saved_disp_dir="./benchmark/datasets/",
134
+ csv_save_path=f"./benchmark/datasets/sintel.csv",
135
+ sample_len=-1,
136
+ datatset_name="sintel",
137
+ )
visualization_pcd.py β†’ visualization/visualization_pcd.py RENAMED
@@ -1,8 +1,7 @@
1
  """Record3D visualizer
2
  """
3
- import os
4
  import time
5
- from pathlib import Path
6
  from decord import VideoReader, cpu
7
 
8
  import numpy as np
@@ -14,24 +13,23 @@ from tqdm.auto import tqdm
14
 
15
 
16
  def main(
17
- data_path: str = "/apdcephfs_cq10/share_1290939/vg_share/reynli/cache/video_demo/video_depth",
18
- vid_name: str = "01_dog",
19
  downsample_factor: int = 8,
20
  max_frames: int = 100,
21
  share: bool = False,
22
- point_size = 0.01
23
  ) -> None:
24
-
25
  server = viser.ViserServer()
26
  if share:
27
  server.request_share_url()
28
 
29
  print("Loading frames!")
30
- dis_path = data_path + '/' + vid_name + '.npz'
31
- vid_path = data_path + '/' + vid_name + '_rgb.mp4'
32
- # vid_path = data_path + '/' + vid_name + '.mp4'
33
-
34
- disp_map = np.load(dis_path)['depth'][:, :, :]
35
  T = disp_map.shape[0]
36
  H = disp_map.shape[1]
37
  W = disp_map.shape[2]
@@ -111,19 +109,19 @@ def main(
111
 
112
  # Add base frame.
113
  frame_nodes.append(server.scene.add_frame(f"/frames/t{i}", show_axes=False))
114
-
115
  position_image = np.where(np.zeros([H, W]) == 0)
116
- v = np.array(position_image[0])
117
- u = np.array(position_image[1])
118
  d = disp_map[i, v, u]
119
 
120
  zc = 1.0 / (d + 0.1)
121
  # zc = 1.0 / (d + 1e-8)
122
-
123
- xc = zc * (u - (W / 2.0)) / (W/2.)
124
- yc = zc * (v - (H / 2.0)) / (H/2.)
125
 
126
- zc -= 4 # disp_max * 0.2
 
 
 
127
 
128
  points = np.stack((xc, yc, zc), axis=1)
129
  colors = vid[i, v, u]
@@ -136,7 +134,7 @@ def main(
136
  name=f"/frames/t{i}/point_cloud",
137
  points=points,
138
  colors=colors,
139
- point_size=point_size,#0.007,
140
  point_shape="rounded",
141
  )
142
 
@@ -154,13 +152,15 @@ def main(
154
 
155
 
156
  if __name__ == "__main__":
157
- tyro.cli(main(
158
- # dir path of saved rgb.mp4 and disp.npz, modify it to your own dir
159
- data_path="outputs/results_open_world/",
160
- # sample name, modify it to your own sample name
161
- vid_name="wukong",
162
- # downsample factor of dense pcd
163
- downsample_factor=8,
164
- # point cloud size
165
- point_size=0.007
166
- ))
 
 
 
1
  """Record3D visualizer
2
  """
3
+
4
  import time
 
5
  from decord import VideoReader, cpu
6
 
7
  import numpy as np
 
13
 
14
 
15
  def main(
16
+ data_path: str,
17
+ vid_name: str,
18
  downsample_factor: int = 8,
19
  max_frames: int = 100,
20
  share: bool = False,
21
+ point_size=0.01,
22
  ) -> None:
23
+
24
  server = viser.ViserServer()
25
  if share:
26
  server.request_share_url()
27
 
28
  print("Loading frames!")
29
+ dis_path = data_path + "/" + vid_name + ".npz"
30
+ vid_path = data_path + "/" + vid_name + "_input.mp4"
31
+
32
+ disp_map = np.load(dis_path)["depth"][:, :, :]
 
33
  T = disp_map.shape[0]
34
  H = disp_map.shape[1]
35
  W = disp_map.shape[2]
 
109
 
110
  # Add base frame.
111
  frame_nodes.append(server.scene.add_frame(f"/frames/t{i}", show_axes=False))
112
+
113
  position_image = np.where(np.zeros([H, W]) == 0)
114
+ v = np.array(position_image[0])
115
+ u = np.array(position_image[1])
116
  d = disp_map[i, v, u]
117
 
118
  zc = 1.0 / (d + 0.1)
119
  # zc = 1.0 / (d + 1e-8)
 
 
 
120
 
121
+ xc = zc * (u - (W / 2.0)) / (W / 2.0)
122
+ yc = zc * (v - (H / 2.0)) / (H / 2.0)
123
+
124
+ zc -= 4 # disp_max * 0.2
125
 
126
  points = np.stack((xc, yc, zc), axis=1)
127
  colors = vid[i, v, u]
 
134
  name=f"/frames/t{i}/point_cloud",
135
  points=points,
136
  colors=colors,
137
+ point_size=point_size, # 0.007,
138
  point_shape="rounded",
139
  )
140
 
 
152
 
153
 
154
  if __name__ == "__main__":
155
+ tyro.cli(
156
+ main(
157
+ # dir path of saved rgb.mp4 and disp.npz, modify it to your own dir
158
+ data_path="./demo_output",
159
+ # sample name, modify it to your own sample name
160
+ vid_name="example_01",
161
+ # downsample factor of dense pcd
162
+ downsample_factor=8,
163
+ # point cloud size
164
+ point_size=0.007,
165
+ )
166
+ )