Spaces:

tencent
/

DepthCrafter

Running on Zero

App Files Files Community

sdsdsdadasd3 commited on 6 days ago

Commit

c186cfb

•

1 Parent(s): abfb3a3

[Add] Add scripts for preparing benchmark datasets.

Browse files

Files changed (8) hide show

.gitignore +3 -1
README.md +1 -0
benchmark/dataset_extract_bonn.py +155 -0
benchmark/dataset_extract_kitti.py +140 -0
benchmark/dataset_extract_nyu.py +106 -0
benchmark/dataset_extract_scannet.py +124 -0
benchmark/dataset_extract_sintel.py +137 -0
visualization_pcd.py → visualization/visualization_pcd.py +29 -29

.gitignore CHANGED Viewed

@@ -166,4 +166,6 @@ cython_debug/
 /eval/*csv
 *__pycache__
 scripts/
-eval/

 /eval/*csv
 *__pycache__
 scripts/
+eval/
+*.DS_Store
+benchmark/datasets

README.md CHANGED Viewed

@@ -27,6 +27,7 @@ arXiv preprint, 2024
 ## 🔆 Introduction
 - [24-9-18] Add point cloud sequence visualization.
 - [24-9-14] 🔥🔥🔥 **DepthCrafter** is released now, have fun!

 ## 🔆 Introduction
+- [24-9-19] Add scripts for preparing benchmark datasets.
 - [24-9-18] Add point cloud sequence visualization.
 - [24-9-14] 🔥🔥🔥 **DepthCrafter** is released now, have fun!

benchmark/dataset_extract_bonn.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import os
+import numpy as np
+import os.path as osp
+from PIL import Image
+from tqdm import tqdm
+import imageio
+import csv
+def depth_read(filename):
+    # loads depth map D from png file
+    # and returns it as a numpy array
+    depth_png = np.asarray(Image.open(filename))
+    # make sure we have a proper 16bit depth map here.. not 8bit!
+    assert np.max(depth_png) > 255
+    depth = depth_png.astype(np.float64) / 5000.0
+    depth[depth_png == 0] = -1.0
+    return depth
+def extract_bonn(
+    root,
+    depth_root,
+    sample_len=-1,
+    csv_save_path="",
+    datatset_name="",
+    saved_rgb_dir="",
+    saved_disp_dir="",
+    start_frame=0,
+    end_frame=110,
+):
+    scenes_names = os.listdir(depth_root)
+    all_samples = []
+    for i, seq_name in enumerate(tqdm(scenes_names)):
+        # load all images
+        all_img_names = os.listdir(osp.join(depth_root, seq_name, "rgb"))
+        all_img_names = [x for x in all_img_names if x.endswith(".png")]
+        print(f"sequence frame number: {len(all_img_names)}")
+        # for not zero padding image name
+        all_img_names.sort()
+        all_img_names = sorted(all_img_names, key=lambda x: int(x.split(".")[0][-4:]))
+        all_img_names = all_img_names[start_frame:end_frame]
+        all_depth_names = os.listdir(osp.join(depth_root, seq_name, "depth"))
+        all_depth_names = [x for x in all_depth_names if x.endswith(".png")]
+        print(f"sequence depth number: {len(all_depth_names)}")
+        # for not zero padding image name
+        all_depth_names.sort()
+        all_depth_names = sorted(
+            all_depth_names, key=lambda x: int(x.split(".")[0][-4:])
+        )
+        all_depth_names = all_depth_names[start_frame:end_frame]
+        seq_len = len(all_img_names)
+        step = sample_len if sample_len > 0 else seq_len
+        for ref_idx in range(0, seq_len, step):
+            print(f"Progress: {seq_name}, {ref_idx // step + 1} / {seq_len//step}")
+            video_imgs = []
+            video_depths = []
+            if (ref_idx + step) <= seq_len:
+                ref_e = ref_idx + step
+            else:
+                continue
+            # for idx in range(ref_idx, ref_idx + step):
+            for idx in range(ref_idx, ref_e):
+                im_path = osp.join(root, seq_name, "rgb", all_img_names[idx])
+                depth_path = osp.join(
+                    depth_root, seq_name, "depth", all_depth_names[idx]
+                )
+                depth = depth_read(depth_path)
+                disp = depth
+                video_depths.append(disp)
+                video_imgs.append(np.array(Image.open(im_path)))
+            disp_video = np.array(video_depths)[:, None]  # [:, 0:1, :, :, 0]
+            img_video = np.array(video_imgs)[..., 0:3]  # [:, 0, :, :, 0:3]
+            print(disp_video.max(), disp_video.min())
+            def even_or_odd(num):
+                if num % 2 == 0:
+                    return num
+                else:
+                    return num - 1
+            # print(disp_video.shape)
+            # print(img_video.shape)
+            height = disp_video.shape[-2]
+            width = disp_video.shape[-1]
+            height = even_or_odd(height)
+            width = even_or_odd(width)
+            disp_video = disp_video[:, :, 0:height, 0:width]
+            img_video = img_video[:, 0:height, 0:width]
+            data_root = saved_rgb_dir + datatset_name
+            disp_root = saved_disp_dir + datatset_name
+            os.makedirs(data_root, exist_ok=True)
+            os.makedirs(disp_root, exist_ok=True)
+            img_video_dir = data_root
+            disp_video_dir = disp_root
+            img_video_path = os.path.join(img_video_dir, f"{seq_name}_rgb_left.mp4")
+            disp_video_path = os.path.join(disp_video_dir, f"{seq_name}_disparity.npz")
+            imageio.mimsave(
+                img_video_path, img_video, fps=15, quality=9, macro_block_size=1
+            )
+            np.savez(disp_video_path, disparity=disp_video)
+            sample = {}
+            sample["filepath_left"] = os.path.join(
+                f"{datatset_name}/{seq_name}_rgb_left.mp4"
+            )  # img_video_path
+            sample["filepath_disparity"] = os.path.join(
+                f"{datatset_name}/{seq_name}_disparity.npz"
+            )  # disp_video_path
+            all_samples.append(sample)
+    # save csv file
+    filename_ = csv_save_path
+    os.makedirs(os.path.dirname(filename_), exist_ok=True)
+    fields = ["filepath_left", "filepath_disparity"]
+    with open(filename_, "w") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fields)
+        writer.writeheader()
+        writer.writerows(all_samples)
+    print(f"{filename_} has been saved.")
+if __name__ == "__main__":
+    extract_bonn(
+        root="path/to/Bonn-RGBD",
+        depth_root="path/to/Bonn-RGBD",
+        saved_rgb_dir="./benchmark/datasets/",
+        saved_disp_dir="./benchmark/datasets/",
+        csv_save_path=f"./benchmark/datasets/bonn.csv",
+        sample_len=-1,
+        datatset_name="bonn",
+        start_frame=30,
+        end_frame=140,
+    )

benchmark/dataset_extract_kitti.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import os
+import numpy as np
+import os.path as osp
+from PIL import Image
+from tqdm import tqdm
+import csv
+import imageio
+def depth_read(filename):
+    # loads depth map D from png file
+    # and returns it as a numpy array,
+    depth_png = np.array(Image.open(filename), dtype=int)
+    # make sure we have a proper 16bit depth map here.. not 8bit!
+    assert np.max(depth_png) > 255
+    depth = depth_png.astype(np.float64) / 256.0
+    depth[depth_png == 0] = -1.0
+    return depth
+def extract_kitti(
+    root,
+    depth_root,
+    sample_len=-1,
+    csv_save_path="",
+    datatset_name="",
+    saved_rgb_dir="",
+    saved_disp_dir="",
+    start_frame=0,
+    end_frame=110,
+):
+    scenes_names = os.listdir(depth_root)
+    all_samples = []
+    for i, seq_name in enumerate(tqdm(scenes_names)):
+        all_img_names = os.listdir(
+            osp.join(depth_root, seq_name, "proj_depth/groundtruth/image_02")
+        )
+        all_img_names = [x for x in all_img_names if x.endswith(".png")]
+        print(f"sequence frame number: {len(all_img_names)}")
+        all_img_names.sort()
+        all_img_names = sorted(all_img_names, key=lambda x: int(x.split(".")[0][-4:]))
+        all_img_names = all_img_names[start_frame:end_frame]
+        seq_len = len(all_img_names)
+        step = sample_len if sample_len > 0 else seq_len
+        for ref_idx in range(0, seq_len, step):
+            print(f"Progress: {seq_name}, {ref_idx // step + 1} / {seq_len//step}")
+            video_imgs = []
+            video_depths = []
+            if (ref_idx + step) <= seq_len:
+                ref_e = ref_idx + step
+            else:
+                continue
+            for idx in range(ref_idx, ref_e):
+                im_path = osp.join(
+                    root, seq_name[0:10], seq_name, "image_02/data", all_img_names[idx]
+                )
+                depth_path = osp.join(
+                    depth_root,
+                    seq_name,
+                    "proj_depth/groundtruth/image_02",
+                    all_img_names[idx],
+                )
+                depth = depth_read(depth_path)
+                disp = depth
+                video_depths.append(disp)
+                video_imgs.append(np.array(Image.open(im_path)))
+            disp_video = np.array(video_depths)[:, None]
+            img_video = np.array(video_imgs)[..., 0:3]
+            def even_or_odd(num):
+                if num % 2 == 0:
+                    return num
+                else:
+                    return num - 1
+            height = disp_video.shape[-2]
+            width = disp_video.shape[-1]
+            height = even_or_odd(height)
+            width = even_or_odd(width)
+            disp_video = disp_video[:, :, 0:height, 0:width]
+            img_video = img_video[:, 0:height, 0:width]
+            data_root = saved_rgb_dir + datatset_name
+            disp_root = saved_disp_dir + datatset_name
+            os.makedirs(data_root, exist_ok=True)
+            os.makedirs(disp_root, exist_ok=True)
+            img_video_dir = data_root
+            disp_video_dir = disp_root
+            img_video_path = os.path.join(img_video_dir, f"{seq_name}_rgb_left.mp4")
+            disp_video_path = os.path.join(disp_video_dir, f"{seq_name}_disparity.npz")
+            imageio.mimsave(
+                img_video_path, img_video, fps=15, quality=10, macro_block_size=1
+            )
+            np.savez(disp_video_path, disparity=disp_video)
+            sample = {}
+            sample["filepath_left"] = os.path.join(f"KITTI/{seq_name}_rgb_left.mp4")
+            sample["filepath_disparity"] = os.path.join(
+                f"KITTI/{seq_name}_disparity.npz"
+            )
+            all_samples.append(sample)
+    filename_ = csv_save_path
+    os.makedirs(os.path.dirname(filename_), exist_ok=True)
+    fields = ["filepath_left", "filepath_disparity"]
+    with open(filename_, "w") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fields)
+        writer.writeheader()
+        writer.writerows(all_samples)
+    print(f"{filename_} has been saved.")
+if __name__ == "__main__":
+    extract_kitti(
+        root="path/to/KITTI/raw_data",
+        depth_root="path/to/KITTI/data_depth_annotated/val",
+        saved_rgb_dir="./benchmark/datasets/",
+        saved_disp_dir="./benchmark/datasets/",
+        csv_save_path=f"./benchmark/datasets/KITTI.csv",
+        sample_len=-1,
+        datatset_name="KITTI",
+        start_frame=0,
+        end_frame=110,
+    )

benchmark/dataset_extract_nyu.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import os
+import numpy as np
+import os.path as osp
+from PIL import Image
+from tqdm import tqdm
+import csv
+import imageio
+def _read_image(img_rel_path) -> np.ndarray:
+    image_to_read = img_rel_path
+    image = Image.open(image_to_read)
+    image = np.asarray(image)
+    return image
+def depth_read(filename):
+    depth_in = _read_image(filename)
+    depth_decoded = depth_in / 1000.0
+    return depth_decoded
+def extract_nyu(
+    root,
+    depth_root,
+    csv_save_path="",
+    datatset_name="",
+    filename_ls_path="",
+    saved_rgb_dir="",
+    saved_disp_dir="",
+):
+    with open(filename_ls_path, "r") as f:
+        filenames = [s.split() for s in f.readlines()]
+    all_samples = []
+    for i, pair_names in enumerate(tqdm(filenames)):
+        img_name = pair_names[0]
+        filled_depth_name = pair_names[2]
+        im_path = osp.join(root, img_name)
+        depth_path = osp.join(depth_root, filled_depth_name)
+        depth = depth_read(depth_path)
+        disp = depth
+        video_depths = [disp]
+        video_imgs = [np.array(Image.open(im_path))]
+        disp_video = np.array(video_depths)[:, None]
+        img_video = np.array(video_imgs)[..., 0:3]
+        disp_video = disp_video[:, :, 45:471, 41:601]
+        img_video = img_video[:, 45:471, 41:601, :]
+        data_root = saved_rgb_dir + datatset_name
+        disp_root = saved_disp_dir + datatset_name
+        os.makedirs(data_root, exist_ok=True)
+        os.makedirs(disp_root, exist_ok=True)
+        img_video_dir = data_root
+        disp_video_dir = disp_root
+        img_video_path = os.path.join(img_video_dir, f"{img_name[:-4]}_rgb_left.mp4")
+        disp_video_path = os.path.join(disp_video_dir, f"{img_name[:-4]}_disparity.npz")
+        dir_name = os.path.dirname(img_video_path)
+        os.makedirs(dir_name, exist_ok=True)
+        dir_name = os.path.dirname(disp_video_path)
+        os.makedirs(dir_name, exist_ok=True)
+        imageio.mimsave(
+            img_video_path, img_video, fps=15, quality=10, macro_block_size=1
+        )
+        np.savez(disp_video_path, disparity=disp_video)
+        sample = {}
+        sample["filepath_left"] = os.path.join(
+            f"{datatset_name}/{img_name[:-4]}_rgb_left.mp4"
+        )
+        sample["filepath_disparity"] = os.path.join(
+            f"{datatset_name}/{img_name[:-4]}_disparity.npz"
+        )
+        all_samples.append(sample)
+    filename_ = csv_save_path
+    os.makedirs(os.path.dirname(filename_), exist_ok=True)
+    fields = ["filepath_left", "filepath_disparity"]
+    with open(filename_, "w") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fields)
+        writer.writeheader()
+        writer.writerows(all_samples)
+    print(f"{filename_} has been saved.")
+if __name__ == "__main__":
+    extract_nyu(
+        root="path/to/NYUv2/",
+        depth_root="path/to/NYUv2/",
+        filename_ls_path="path/to/NYUv2/filename_list_test.txt",
+        saved_rgb_dir="./benchmark/datasets/",
+        saved_disp_dir="./benchmark/datasets/",
+        csv_save_path=f"./benchmark/datasets/NYUv2.csv",
+        datatset_name="NYUv2",
+    )

benchmark/dataset_extract_scannet.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+import numpy as np
+import os.path as osp
+from PIL import Image
+from tqdm import tqdm
+import csv
+import imageio
+def _read_image(img_rel_path) -> np.ndarray:
+    image_to_read = img_rel_path
+    image = Image.open(image_to_read)  # [H, W, rgb]
+    image = np.asarray(image)
+    return image
+def depth_read(filename):
+    depth_in = _read_image(filename)
+    depth_decoded = depth_in / 1000.0
+    return depth_decoded
+def extract_scannet(
+    root,
+    sample_len=-1,
+    csv_save_path="",
+    datatset_name="",
+    scene_number=16,
+    scene_frames_len=120,
+    stride=1,
+    saved_rgb_dir="",
+    saved_disp_dir="",
+):
+    scenes_names = os.listdir(root)
+    scenes_names = sorted(scenes_names)[:scene_number]
+    all_samples = []
+    for i, seq_name in enumerate(tqdm(scenes_names)):
+        all_img_names = os.listdir(osp.join(root, seq_name, "color"))
+        all_img_names = [x for x in all_img_names if x.endswith(".jpg")]
+        all_img_names = sorted(all_img_names, key=lambda x: int(x.split(".")[0]))
+        all_img_names = all_img_names[:scene_frames_len:stride]
+        print(f"sequence frame number: {len(all_img_names)}")
+        seq_len = len(all_img_names)
+        step = sample_len if sample_len > 0 else seq_len
+        for ref_idx in range(0, seq_len, step):
+            print(f"Progress: {seq_name}, {ref_idx // step + 1} / {seq_len//step}")
+            video_imgs = []
+            video_depths = []
+            if (ref_idx + step) <= seq_len:
+                ref_e = ref_idx + step
+            else:
+                continue
+            for idx in range(ref_idx, ref_e):
+                im_path = osp.join(root, seq_name, "color", all_img_names[idx])
+                depth_path = osp.join(
+                    root, seq_name, "depth", all_img_names[idx][:-3] + "png"
+                )
+                depth = depth_read(depth_path)
+                disp = depth
+                video_depths.append(disp)
+                video_imgs.append(np.array(Image.open(im_path)))
+            disp_video = np.array(video_depths)[:, None]
+            img_video = np.array(video_imgs)[..., 0:3]
+            disp_video = disp_video[:, :, 8:-8, 11:-11]
+            img_video = img_video[:, 8:-8, 11:-11, :]
+            data_root = saved_rgb_dir + datatset_name
+            disp_root = saved_disp_dir + datatset_name
+            os.makedirs(data_root, exist_ok=True)
+            os.makedirs(disp_root, exist_ok=True)
+            img_video_dir = data_root
+            disp_video_dir = disp_root
+            img_video_path = os.path.join(img_video_dir, f"{seq_name}_rgb_left.mp4")
+            disp_video_path = os.path.join(disp_video_dir, f"{seq_name}_disparity.npz")
+            imageio.mimsave(
+                img_video_path, img_video, fps=15, quality=9, macro_block_size=1
+            )
+            np.savez(disp_video_path, disparity=disp_video)
+            sample = {}
+            sample["filepath_left"] = os.path.join(
+                f"{datatset_name}/{seq_name}_rgb_left.mp4"
+            )
+            sample["filepath_disparity"] = os.path.join(
+                f"{datatset_name}/{seq_name}_disparity.npz"
+            )
+            all_samples.append(sample)
+    filename_ = csv_save_path
+    os.makedirs(os.path.dirname(filename_), exist_ok=True)
+    fields = ["filepath_left", "filepath_disparity"]
+    with open(filename_, "w") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fields)
+        writer.writeheader()
+        writer.writerows(all_samples)
+    print(f"{filename_} has been saved.")
+if __name__ == "__main__":
+    extract_scannet(
+        root="path/to/ScanNet_v2/raw/scans_test",
+        saved_rgb_dir="./benchmark/datasets/",
+        saved_disp_dir="./benchmark/datasets/",
+        csv_save_path=f"./benchmark/datasets/scannet.csv",
+        sample_len=-1,
+        datatset_name="scannet",
+        scene_number=100,
+        scene_frames_len=90 * 3,
+        stride=3,
+    )

benchmark/dataset_extract_sintel.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# # Data loading based on https://github.com/NVIDIA/flownet2-pytorch
+import os
+import numpy as np
+import os.path as osp
+from PIL import Image
+from tqdm import tqdm
+import csv
+import imageio
+# Check for endianness, based on Daniel Scharstein's optical flow code.
+# Using little-endian architecture, these two should be equal.
+TAG_FLOAT = 202021.25
+TAG_CHAR = "PIEH"
+def depth_read(filename):
+    """Read depth data from file, return as numpy array."""
+    f = open(filename, "rb")
+    check = np.fromfile(f, dtype=np.float32, count=1)[0]
+    assert (
+        check == TAG_FLOAT
+    ), " depth_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? ".format(
+        TAG_FLOAT, check
+    )
+    width = np.fromfile(f, dtype=np.int32, count=1)[0]
+    height = np.fromfile(f, dtype=np.int32, count=1)[0]
+    size = width * height
+    assert (
+        width > 0 and height > 0 and size > 1 and size < 100000000
+    ), " depth_read:: Wrong input size (width = {0}, height = {1}).".format(
+        width, height
+    )
+    depth = np.fromfile(f, dtype=np.float32, count=-1).reshape((height, width))
+    return depth
+def extract_sintel(
+    root,
+    depth_root,
+    sample_len=-1,
+    csv_save_path="",
+    datatset_name="",
+    saved_rgb_dir="",
+    saved_disp_dir="",
+):
+    scenes_names = os.listdir(root)
+    all_samples = []
+    for i, seq_name in enumerate(tqdm(scenes_names)):
+        all_img_names = os.listdir(os.path.join(root, seq_name))
+        all_img_names = [x for x in all_img_names if x.endswith(".png")]
+        all_img_names.sort()
+        all_img_names = sorted(all_img_names, key=lambda x: int(x.split(".")[0][-4:]))
+        seq_len = len(all_img_names)
+        step = sample_len if sample_len > 0 else seq_len
+        for ref_idx in range(0, seq_len, step):
+            print(f"Progress: {seq_name}, {ref_idx // step} / {seq_len // step}")
+            video_imgs = []
+            video_depths = []
+            if (ref_idx + step) <= seq_len:
+                ref_e = ref_idx + step
+            else:
+                continue
+            for idx in range(ref_idx, ref_e):
+                im_path = osp.join(root, seq_name, all_img_names[idx])
+                depth_path = osp.join(
+                    depth_root, seq_name, all_img_names[idx][:-3] + "dpt"
+                )
+                depth = depth_read(depth_path)
+                disp = depth
+                video_depths.append(disp)
+                video_imgs.append(np.array(Image.open(im_path)))
+            disp_video = np.array(video_depths)[:, None]
+            img_video = np.array(video_imgs)[..., 0:3]
+            data_root = saved_rgb_dir + datatset_name
+            disp_root = saved_disp_dir + datatset_name
+            os.makedirs(data_root, exist_ok=True)
+            os.makedirs(disp_root, exist_ok=True)
+            img_video_dir = data_root
+            disp_video_dir = disp_root
+            img_video_path = os.path.join(img_video_dir, f"{seq_name}_rgb_left.mp4")
+            disp_video_path = os.path.join(disp_video_dir, f"{seq_name}_disparity.npz")
+            imageio.mimsave(
+                img_video_path, img_video, fps=15, quality=10, macro_block_size=1
+            )
+            np.savez(disp_video_path, disparity=disp_video)
+            sample = {}
+            sample["filepath_left"] = os.path.join(
+                f"{datatset_name}/{seq_name}_rgb_left.mp4"
+            )
+            sample["filepath_disparity"] = os.path.join(
+                f"{datatset_name}/{seq_name}_disparity.npz"
+            )
+            all_samples.append(sample)
+    filename_ = csv_save_path
+    os.makedirs(os.path.dirname(filename_), exist_ok=True)
+    fields = ["filepath_left", "filepath_disparity"]
+    with open(filename_, "w") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fields)
+        writer.writeheader()
+        writer.writerows(all_samples)
+    print(f"{filename_} has been saved.")
+if __name__ == "__main__":
+    extract_sintel(
+        root="path/to/Sintel-Depth/training_image/clean",
+        depth_root="path/to/Sintel-Depth/MPI-Sintel-depth-training-20150305/training/depth",
+        saved_rgb_dir="./benchmark/datasets/",
+        saved_disp_dir="./benchmark/datasets/",
+        csv_save_path=f"./benchmark/datasets/sintel.csv",
+        sample_len=-1,
+        datatset_name="sintel",
+    )

visualization_pcd.py → visualization/visualization_pcd.py RENAMED Viewed

@@ -1,8 +1,7 @@
 """Record3D visualizer
 """
-import os
 import time
-from pathlib import Path
 from decord import VideoReader, cpu
 import numpy as np
@@ -14,24 +13,23 @@ from tqdm.auto import tqdm
 def main(
-    data_path: str = "/apdcephfs_cq10/share_1290939/vg_share/reynli/cache/video_demo/video_depth",
-    vid_name: str = "01_dog",
     downsample_factor: int = 8,
     max_frames: int = 100,
     share: bool = False,
-    point_size = 0.01
 ) -> None:
     server = viser.ViserServer()
     if share:
         server.request_share_url()
     print("Loading frames!")
-    dis_path = data_path + '/' + vid_name + '.npz'
-    vid_path = data_path + '/' + vid_name + '_rgb.mp4'
-    # vid_path = data_path + '/' + vid_name + '.mp4'
-    disp_map = np.load(dis_path)['depth'][:, :, :]
     T = disp_map.shape[0]
     H = disp_map.shape[1]
     W = disp_map.shape[2]
@@ -111,19 +109,19 @@ def main(
         # Add base frame.
         frame_nodes.append(server.scene.add_frame(f"/frames/t{i}", show_axes=False))
         position_image = np.where(np.zeros([H, W]) == 0)
-        v = np.array(position_image[0])
-        u = np.array(position_image[1])
         d = disp_map[i, v, u]
         zc = 1.0 / (d + 0.1)
         # zc = 1.0 / (d + 1e-8)
-        xc = zc * (u - (W / 2.0)) / (W/2.)
-        yc = zc * (v - (H / 2.0)) / (H/2.)
-        zc -= 4 # disp_max * 0.2
         points = np.stack((xc, yc, zc), axis=1)
         colors = vid[i, v, u]
@@ -136,7 +134,7 @@ def main(
             name=f"/frames/t{i}/point_cloud",
             points=points,
             colors=colors,
-            point_size=point_size,#0.007,
             point_shape="rounded",
         )
@@ -154,13 +152,15 @@ def main(
 if __name__ == "__main__":
-    tyro.cli(main(
-        # dir path of saved rgb.mp4 and disp.npz, modify it to your own dir
-        data_path="outputs/results_open_world/",
-        # sample name, modify it to your own sample name
-        vid_name="wukong",
-        # downsample factor of dense pcd
-        downsample_factor=8,
-        # point cloud size
-        point_size=0.007
-    ))

 """Record3D visualizer
 """
 import time
 from decord import VideoReader, cpu
 import numpy as np
 def main(
+    data_path: str,
+    vid_name: str,
     downsample_factor: int = 8,
     max_frames: int = 100,
     share: bool = False,
+    point_size=0.01,
 ) -> None:
     server = viser.ViserServer()
     if share:
         server.request_share_url()
     print("Loading frames!")
+    dis_path = data_path + "/" + vid_name + ".npz"
+    vid_path = data_path + "/" + vid_name + "_input.mp4"
+    disp_map = np.load(dis_path)["depth"][:, :, :]
     T = disp_map.shape[0]
     H = disp_map.shape[1]
     W = disp_map.shape[2]
         # Add base frame.
         frame_nodes.append(server.scene.add_frame(f"/frames/t{i}", show_axes=False))
         position_image = np.where(np.zeros([H, W]) == 0)
+        v = np.array(position_image[0])
+        u = np.array(position_image[1])
         d = disp_map[i, v, u]
         zc = 1.0 / (d + 0.1)
         # zc = 1.0 / (d + 1e-8)
+        xc = zc * (u - (W / 2.0)) / (W / 2.0)
+        yc = zc * (v - (H / 2.0)) / (H / 2.0)
+        zc -= 4  # disp_max * 0.2
         points = np.stack((xc, yc, zc), axis=1)
         colors = vid[i, v, u]
             name=f"/frames/t{i}/point_cloud",
             points=points,
             colors=colors,
+            point_size=point_size,  # 0.007,
             point_shape="rounded",
         )
 if __name__ == "__main__":
+    tyro.cli(
+        main(
+            # dir path of saved rgb.mp4 and disp.npz, modify it to your own dir
+            data_path="./demo_output",
+            # sample name, modify it to your own sample name
+            vid_name="example_01",
+            # downsample factor of dense pcd
+            downsample_factor=8,
+            # point cloud size
+            point_size=0.007,
+        )
+    )