Hi everyone,
I'm trying to figure out the easiest way to import a dataset created using the StrayScanner App for iOS (source code). I assume there’s no way to use the GUI for importing this data, and that I would need to write my own code—am I correct? Would I need to set up a Camera, or can I just create SensorData objects and write them directly to the database? Dataset Output Includes:
$ tree . ├── camera_matrix.csv ├── confidence │ ├── 000000.png │ ├── 000001.png │ ├── 000002.png │ ├── 000003.png │ └── ... ├── depth │ ├── 000000.png │ ├── 000001.png │ ├── 000002.png │ ├── 000003.png │ └── ... ├── imu.csv ├── odometry.csv └── rgb.mp4 $ head imu.csv timestamp, a_x, a_y, a_z, alpha_x, alpha_y, alpha_z 15915.255230750001, 0.153580682259053, -8.252490030527115, -5.08627245604992, 0.22434598207473755, -0.028923140838742256, -0.029491707682609558 15915.26522975, 0.04655333831906319, -8.288565596938133, -4.97191115051508, 0.22057121992111206, -0.030368171632289886, -0.05559654161334038 15915.27522875, -0.015417846869677307, -8.319250615239143, -4.867127876579762, 0.2120652049779892, -0.03173523768782616, -0.08128055185079575 $ head odometry.csv timestamp, frame, x, y, z, qx, qy, qz, qw 15915.154373916, 000000, -0.08409889, 0.11823461, -0.0065367892, 0.67582226, -0.6899462, 0.1505177, 0.21114671 15915.171033041, 000001, -0.085462295, 0.11920304, -0.006035801, 0.6761877, -0.6899391, 0.15041438, 0.21007104 15915.187692416, 000002, -0.08725144, 0.1201173, -0.0058480985, 0.67669433, -0.6894132, 0.15118878, 0.20960967 15915.20435175, 000003, -0.08921377, 0.1210426, -0.0057165325, 0.67727727, -0.6887213, 0.15215991, 0.20929849 $ cat camera_matrix.csv 1616.4161, 0.0, 946.6603 0.0, 1616.4161, 716.663 0.0, 0.0, 1.0 |
Administrator
|
I took a look at how we can use the data. I saw in https://github.com/kekeblom/StrayVisualizer that they added a script to convert to open3d format (convert_to_open3d.py). I've modified that script to export all data in format that rtabmap understands. Here the script that I called "convert_to_rtabmap.py":
import argparse import os import numpy as np from ruamel.yaml import YAML from ruamel.yaml.comments import CommentedSeq import cv2 from skvideo import io from PIL import Image from stray_visualize import DEPTH_WIDTH, DEPTH_HEIGHT, _resize_camera_matrix import pandas as pd from scipy.spatial.transform import Rotation as R FRAME_WIDTH = 1920 FRAME_HEIGHT = 1440 OUT_WIDTH = 640 OUT_HEIGHT = 480 def read_args(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str) parser.add_argument('--out', type=str) parser.add_argument('--confidence', type=int, default=2) return parser.parse_args() def write_frames(flags, rgb_out_dir): rgb_video = os.path.join(flags.dataset, 'rgb.mp4') video = io.vreader(rgb_video) for i, frame in enumerate(video): print(f"Writing rgb frame {i:06}" + " " * 10, end='\r') frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame = cv2.resize(frame, (OUT_WIDTH, OUT_HEIGHT)) frame_path = os.path.join(rgb_out_dir, f"{i:06}.jpg") params = [int(cv2.IMWRITE_JPEG_QUALITY), 90] cv2.imwrite(frame_path, frame, params) def resize_depth(depth): out = cv2.resize(depth, (OUT_WIDTH, OUT_HEIGHT), interpolation=cv2.INTER_NEAREST_EXACT) out[out < 10] = 0 return out def write_depth(flags, depth_out_dir): depth_dir_in = os.path.join(flags.dataset, 'depth') confidence_dir = os.path.join(flags.dataset, 'confidence') files = sorted(os.listdir(depth_dir_in)) for index, filename in enumerate(files): if not ('.npy' in filename or '.png' in filename): continue if index == len(files)-1: # ignore last file to match the color number break print(f"Writing depth frame {filename}", end='\r') number, _ = filename.split('.') confidence = cv2.imread(os.path.join(confidence_dir, number + '.png'))[:, :, 0] depth = np.array(Image.open(os.path.join(depth_dir_in, filename))) depth[confidence < flags.confidence] = 0 depth = resize_depth(depth) cv2.imwrite(os.path.join(depth_out_dir, f'{number}.png'), depth) def write_intrinsics(flags): yaml = YAML() yaml.width = 60 intrinsics = np.loadtxt(os.path.join(flags.dataset, 'camera_matrix.csv'), delimiter=',') data = {} intrinsics_scaled = _resize_camera_matrix(intrinsics, OUT_WIDTH / FRAME_WIDTH, OUT_HEIGHT / FRAME_HEIGHT) data['camera_name'] = 'Stray Scanner dataset' data['image_width'] = OUT_WIDTH data['image_height'] = OUT_HEIGHT camera_matrix = {} camera_matrix['rows'] = 3 camera_matrix['cols'] = 3 camera_matrix['data'] = CommentedSeq( [float(intrinsics_scaled[0, 0]), 0.0, float(intrinsics_scaled[0, 2]), 0.0, float(intrinsics_scaled[1, 1]), float(intrinsics_scaled[1, 2]), 0.0, 0.0, 1.0]) data['camera_matrix'] = camera_matrix data['camera_matrix']['data'].fa.set_flow_style() with open(os.path.join(flags.out, 'camera_intrinsics.yaml'), 'wt') as f: f.write("%YAML:1.0\n---\n") # compatibility with opencv's yaml loader yaml.dump(data, f) def write_odometry(flags): # Reorganize as below from while converting poses from opengl to ROS coordinate frame. # From: # timestamp frame x y z qx qy qz qw # to: # timestamp x y z qx qy qz qw frame df = pd.read_csv(os.path.join(flags.dataset, 'odometry.csv'), sep=r'[\s,]+', engine='python') # Extract position and orientation positions = df[['x', 'y', 'z']].values orientations = df[['qx', 'qy', 'qz', 'qw']].values Q = R.from_quat([ 0.5, -0.5, -0.5, 0.5 ]) Q_inv = Q.inv() # This rotation has been added to original pose, remove it (https://github.com/strayrobots/scanner/blob/e0e252221048b80d288dc1f264df775aacbe4304/StrayScanner/Helpers/OdometryEncoder.swift#L37) q_AC = R.from_quat([ 1., 0., 0., 0. ]) q_AC_inv = q_AC.inv() # Apply Q * position * Q^-1 (rotate each position by Q) rotated_positions = Q.apply(positions) # Apply Q * orientation * Q^-1 (rotate each orientation) rotated_orientations = [] for q in orientations: rot = R.from_quat(q) * q_AC_inv new_rot = Q * rot * Q_inv rotated_orientations.append(new_rot.as_quat()) rotated_orientations = np.array(rotated_orientations) df_out = df.copy() df_out[['x', 'y', 'z']] = rotated_positions df_out[['qx', 'qy', 'qz', 'qw']] = rotated_orientations # move "id" column at the end cols = [col for col in df_out.columns if col != 'frame'] + ['frame'] df_out = df_out[cols] # ignore last entry to match the color number df_out = df_out[:-1] # Save to file df_out.to_csv(os.path.join(flags.out, 'odometry.txt'), sep=' ', index=False, float_format='%.8f') df_out['timestamp'].to_csv(os.path.join(flags.out, 'timestamps.txt'), header=False, index=False, float_format='%.8f') def write_imu(flags): # Convert imu data to EuRoC dataset format: # From: # timestamp(float), a_x, a_y, a_z, alpha_x, alpha_y, alpha_z # to: # timestamp [ns],w_RS_S_x [rad s^-1],w_RS_S_y [rad s^-1],w_RS_S_z [rad s^-1],a_RS_S_x [m s^-2],a_RS_S_y [m s^-2],a_RS_S_z [m s^-2] df = pd.read_csv(os.path.join(flags.dataset, 'imu.csv'), sep=r'[\s,]+', engine='python') # Extract position and orientation timestamps = df['timestamp'].to_numpy() timestamps = timestamps*(10**9) df_out = df.copy() df_out['timestamp'] = np.array(timestamps).astype(np.int64) df_out['timestamp'] = df_out['timestamp'].astype(str).apply(lambda x: x.zfill(18)) # Transform gyro (x->up, y->left, z->backward) in base frame (x->forward, y->left, z->up) df_out['alpha_x'] = df['alpha_z'].to_numpy() * -1 df_out['alpha_z'] = df['alpha_x'].to_numpy() # Transform acc (x->down, y->right, z->forward) in base frame (x->forward, y->left, z->up) df_out['a_x'] = df['a_z'].to_numpy() df_out['a_y'] = df['a_y'].to_numpy() * -1 df_out['a_z'] = df['a_x'].to_numpy() * -1 # Save to file df_out = df_out[['timestamp', 'alpha_x', 'alpha_y', 'alpha_z', 'a_x', 'a_y', 'a_z']] df_out.to_csv(os.path.join(flags.out, 'imu.csv'), sep=',', index=False, float_format='%.16f') def main(): flags = read_args() rgb_out = os.path.join(flags.out, 'color/') depth_out = os.path.join(flags.out, 'depth/') os.makedirs(rgb_out, exist_ok=True) os.makedirs(depth_out, exist_ok=True) write_intrinsics(flags) write_odometry(flags) write_imu(flags) write_depth(flags, depth_out) write_frames(flags, rgb_out) print("\nDone.") if __name__ == "__main__": main() The output folder would look like this: tree . ├── camera_intrinsics.yaml ├── color │ ├── 000000.jpg │ ├── 000001.jpg │ ├── 000002.jpg │ ├── 000003.jpg │ ├── 000004.jpg │ ├── 000005.jpg │ ├── ... ├── depth │ ├── 000000.png │ ├── 000001.png │ ├── 000002.png │ ├── 000003.png │ ├── 000004.png │ ├── 000005.png │ ├── ... ├── imu.csv ├── odometry.txt └── timestamps.txt For camera_intrinsics.yaml: %YAML:1.0 --- camera_name: Stray Scanner dataset image_width: 640 image_height: 480 camera_matrix: rows: 3 cols: 3 data: [527.2065, 0.0, 317.71659999999997, 0.0, 527.2065, 241.0965333333333, 0.0, 0.0, 1.0] For imu.csv (EuRoC dataset format): timestamp,alpha_x,alpha_y,alpha_z,a_x,a_y,a_z 000004645819449625,0.0475909747183322,0.6236582994461060,-0.3067404031753540,-7.5696083325147629,7.2757711279392243,-0.6993458127602935 000004645829526625,-0.0044679651036858,0.5962487459182739,-0.2209173589944839,-7.6692998510599137,7.2679867297410956,-0.9482781187444924 000004645839603625,-0.0337592735886573,0.5310484766960144,-0.1221191436052322,-7.6706482189893723,7.5134768164157872,-0.7553293212130666 000004645849680625,-0.0451125949621200,0.4652302563190460,-0.0243366602808237,-7.6161615246534353,7.7248361206054694,-0.4843927367404103Note that the gyro and acc data are transformed by the script in base frame instead of imu frame. Warning: I am not sure the same transformation can be used from data from different phones. StrayScanner doesn't provide the transformation between device frame to imu frame, so we assume "Identity" rotation after manually re-ordering the axes. For odometry.txt (TUM's RGB-D SLAM dataset format): timestamp x y z qx qy qz qw frame 4645.65062379 -0.43445750 -0.88241106 0.39945477 -0.69191543 0.15994754 0.53701245 -0.45528831 0 4645.66728312 -0.43484810 -0.88243765 0.39953896 -0.69142839 0.16051550 0.53676923 -0.45611440 1 4645.68394250 -0.43498490 -0.88285834 0.39943823 -0.69118981 0.16050657 0.53664950 -0.45661976 2 4645.70060158 -0.43499184 -0.88358980 0.39917004 -0.69118694 0.15973206 0.53691112 -0.45658814 3Note that the poses are transformed by the script in rtabmap's world coordinate frame (or ROS coordinate frame). For timestamps.txt (we could have modified the script to output color and depth image files with stamp already to avoid this file): 4645.65062379 4645.66728312 4645.68394250 4645.70060158 4645.71726100 4645.73392029You will also see in the script that I remove the last entry and last depth image from the output folder to match the same number of color images, otherwise rtabmap will complain about it. Then to use that data in rtabmap, open Preferences dialog and setup all those parameters with the corresponding file: ![]() ![]() ![]() Note that odometry and imu parameters are optional. That dataset helped me to improve how imu and image sync is done in the standalone app, I put some fixes here that you may want to have. cheers, Mathieu |
Hi Mathieu,
thanks a lot. Had to rebuild with your changes but now it seems to work well. Currently I am running out of memory when buildings the map, but I still need to test out different settings and see if I don't split up the scan in to multiple maps. cheers Povl |
Free forum by Nabble | Edit this page |