Spaces:

microsoft
/

VITRA

Running on Zero

App Files Files Community

VITRA / thirdparty /HaWoR /scripts /scripts_test_video /hawor_slam.py

arnoldland

Initial commit

aae3ba1 6 days ago

raw

history blame contribute delete

4.55 kB

	import math
	import sys
	import os

	from natsort import natsorted

	sys.path.insert(0, os.path.dirname(__file__) + '/../..')

	import argparse
	from tqdm import tqdm
	import numpy as np
	import torch
	import cv2
	from PIL import Image
	from glob import glob
	from pycocotools import mask as masktool
	from lib.pipeline.masked_droid_slam import *
	from lib.pipeline.est_scale import *
	from hawor.utils.process import block_print, enable_print

	sys.path.insert(0, os.path.dirname(__file__) + '/../../thirdparty/Metric3D')
	from metric import Metric3D


	def get_all_mp4_files(folder_path):
	# Ensure the folder path is absolute
	folder_path = os.path.abspath(folder_path)

	# Recursively search for all .mp4 files in the folder and its subfolders
	mp4_files = glob(os.path.join(folder_path, '*', '.mp4'), recursive=True)

	return mp4_files

	def split_list_by_interval(lst, interval=1000):
	start_indices = []
	end_indices = []
	split_lists = []

	for i in range(0, len(lst), interval):
	start_indices.append(i)
	end_indices.append(min(i + interval, len(lst)))
	split_lists.append(lst[i:i + interval])

	return start_indices, end_indices, split_lists

	def hawor_slam(args, start_idx, end_idx):
	# File and folders
	file = args.video_path
	video_root = os.path.dirname(file)
	video = os.path.basename(file).split('.')[0]
	seq_folder = os.path.join(video_root, video)
	os.makedirs(seq_folder, exist_ok=True)
	video_folder = os.path.join(video_root, video)

	img_folder = f'{video_folder}/extracted_images'
	imgfiles = natsorted(glob(f'{img_folder}/*.jpg'))

	first_img = cv2.imread(imgfiles[0])
	height, width, _ = first_img.shape

	print(f'Running slam on {video_folder} ...')

	##### Run SLAM #####
	# Use Masking
	masks = np.load(f'{video_folder}/tracks_{start_idx}_{end_idx}/model_masks.npy', allow_pickle=True)
	masks = torch.from_numpy(masks)
	print(masks.shape)

	# Camera calibration (intrinsics) for SLAM
	focal = args.img_focal
	if focal is None:
	try:
	with open(os.path.join(video_folder, 'est_focal.txt'), 'r') as file:
	focal = file.read()
	focal = float(focal)
	except:

	print('No focal length provided')
	focal = 600
	with open(os.path.join(video_folder, 'est_focal.txt'), 'w') as file:
	file.write(str(focal))
	calib = np.array(est_calib(imgfiles)) # [focal, focal, cx, cy]
	center = calib[2:]
	calib[:2] = focal

	# Droid-slam with masking
	droid, traj = run_slam(imgfiles, masks=masks, calib=calib)
	n = droid.video.counter.value
	tstamp = droid.video.tstamp.cpu().int().numpy()[:n]
	disps = droid.video.disps_up.cpu().numpy()[:n]
	print('DBA errors:', droid.backend.errors)

	del droid
	torch.cuda.empty_cache()

	# Estimate scale
	block_print()
	metric = Metric3D('thirdparty/Metric3D/weights/metric_depth_vit_large_800k.pth')
	enable_print()
	min_threshold = 0.4
	max_threshold = 0.7

	print('Predicting Metric Depth ...')
	pred_depths = []
	H, W = get_dimention(imgfiles)
	for t in tqdm(tstamp):
	pred_depth = metric(imgfiles[t], calib)
	pred_depth = cv2.resize(pred_depth, (W, H))
	pred_depths.append(pred_depth)

	##### Estimate Metric Scale #####
	print('Estimating Metric Scale ...')
	scales_ = []
	n = len(tstamp) # for each keyframe
	for i in tqdm(range(n)):
	t = tstamp[i]
	disp = disps[i]
	pred_depth = pred_depths[i]
	slam_depth = 1/disp

	# Estimate scene scale
	msk = masks[t].numpy().astype(np.uint8)
	scale = est_scale_hybrid(slam_depth, pred_depth, sigma=0.5, msk=msk, near_thresh=min_threshold, far_thresh=max_threshold)
	while math.isnan(scale):
	min_threshold -= 0.1
	max_threshold += 0.1
	scale = est_scale_hybrid(slam_depth, pred_depth, sigma=0.5, msk=msk, near_thresh=min_threshold, far_thresh=max_threshold)
	scales_.append(scale)

	median_s = np.median(scales_)
	print(f"estimated scale: {median_s}")

	# Save results
	os.makedirs(f"{seq_folder}/SLAM", exist_ok=True)
	save_path = f'{seq_folder}/SLAM/hawor_slam_w_scale_{start_idx}_{end_idx}.npz'
	np.savez(save_path,
	tstamp=tstamp, disps=disps, traj=traj,
	img_focal=focal, img_center=calib[-2:],
	scale=median_s)