diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..e92cf9b5 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.pt filter=lfs diff=lfs merge=lfs -text diff --git a/config/nerian_learned_features_extras.yaml b/config/nerian_learned_features_extras.yaml index 2d18c852..56922c01 100644 --- a/config/nerian_learned_features_extras.yaml +++ b/config/nerian_learned_features_extras.yaml @@ -2,6 +2,7 @@ ros__parameters: log_enabled: - stereo.learned_features + # - tactic.module ############ tactic configuration ############ tactic: @@ -32,8 +33,7 @@ learned: # we're providing the surf settings (don't change this param, use a different file) type: "LEARNED_FEATURE" - modelPath: "/home/alec/ASRL/vtr3/models/unsup_unet_3.pt" - + modelPath: "${VTRROOT}/models/weights.pt" stereoDisparityMinimum: 0.1 stereoDisparityMaximum: 100.0 diff --git a/main/src/vtr_vision/CMakeLists.txt b/main/src/vtr_vision/CMakeLists.txt index a9af9316..3b86b840 100644 --- a/main/src/vtr_vision/CMakeLists.txt +++ b/main/src/vtr_vision/CMakeLists.txt @@ -5,6 +5,8 @@ project(vtr_vision) # add_compile_options(-Wall -Wextra -Wpedantic) # endif() +#Add debug symbols +# add_compile_options(-g -Og) # Common setup for vtr packages diff --git a/main/src/vtr_vision/src/features/extractor/base_feature_extractor.cpp b/main/src/vtr_vision/src/features/extractor/base_feature_extractor.cpp index 7f948a45..138bba8f 100644 --- a/main/src/vtr_vision/src/features/extractor/base_feature_extractor.cpp +++ b/main/src/vtr_vision/src/features/extractor/base_feature_extractor.cpp @@ -170,9 +170,10 @@ ChannelFeatures BFE::extractChannelFeatures(const ChannelImages &channel, ChannelFeatures BFE::extractChannelFeaturesDisp( const ChannelImages &channel, const ChannelImages &channel_disp, bool fully_matched = false) { - if (fully_matched && channel.cameras.size() == 2) - // return extractStereoFeaturesDisp(channel, channel_disp); + if (fully_matched && channel.cameras.size() == 2){ + // CLOG(DEBUG, "stereo.learned_features") << "made it here"; return extractStereoFeaturesDisp(channel, channel_disp); + } ChannelFeatures features; features.name = channel.name; diff --git a/main/src/vtr_vision/src/features/extractor/learned_feature_extractor.cpp b/main/src/vtr_vision/src/features/extractor/learned_feature_extractor.cpp index 1d76d8f8..6c43963f 100644 --- a/main/src/vtr_vision/src/features/extractor/learned_feature_extractor.cpp +++ b/main/src/vtr_vision/src/features/extractor/learned_feature_extractor.cpp @@ -42,14 +42,13 @@ torch::Tensor getKeypointDisparities(torch::Tensor disparity, namespace F = torch::nn::functional; auto options = F::GridSampleFuncOptions().mode( - torch::kBilinear).padding_mode(torch::kBorder).align_corners(false); + torch::kNearest).padding_mode(torch::kBorder).align_corners(false); - CLOG(INFO, "stereo.learned_features") << "disparity:" << disparity.sizes(); - CLOG(INFO, "stereo.learned_features") << "kp_norm:" << keypoints_norm.sizes(); - + auto output = F::grid_sample(disparity.contiguous(), keypoints_norm.contiguous(), options).reshape({-1}); - return F::grid_sample(disparity, keypoints_norm, options).reshape({-1}); + return output; + // return F::grid_sample(disparity, keypoints_norm, options).reshape({-1}); } @@ -232,23 +231,20 @@ torch::Tensor LFE::getDisparity(const cv::Mat& left, const cv::Mat& right, //////////////////////////////////////////////////////////////////////////////// torch::Tensor LFE::getDisparityTensor(const cv::Mat& disp) { - float disparity_multiplier = 1.0f; - if (disp.type() == CV_16S) { - disparity_multiplier = 16.0f; - } - cv::Mat floatDisp; - disp.convertTo(floatDisp, CV_32F, 1.0f / disparity_multiplier); + // CLOG(DEBUG, "stereo.learned_features") << "disp_type " << disp.type(); + // float disparity_multiplier = 16.0f; + + // cv::Mat floatDisp; + // disp.convertTo(floatDisp, CV_32F, 1.0f / disparity_multiplier); //Crop the image - cv::Mat disp_cropped; - floatDisp.copyTo(disp_cropped); //floatDisp(cv::Rect(48, 0, 464, 384)).copyTo(disp_cropped); // Convert the cv image to a tensor - torch::Tensor disp_tensor = torch::from_blob(disp_cropped.data, - {disp_cropped.rows, - disp_cropped.cols, 1}, - torch::kFloat); + torch::Tensor disp_tensor = torch::from_blob(disp.data, + {disp.rows, + disp.cols, 1}, + torch::kInt16).toType(torch::kFloat) / 16.0f; // torch::Tensor disp_tensor = torch::from_blob(floatDisp.data, // {floatDisp.rows, @@ -258,7 +254,7 @@ torch::Tensor LFE::getDisparityTensor(const cv::Mat& disp) { disp_tensor = disp_tensor.permute({(2), (0), (1)}); disp_tensor.unsqueeze_(0); - return disp_tensor; + return disp_tensor.contiguous(); } //////////////////////////////////////////////////////////////////////////////// @@ -337,7 +333,8 @@ std::tuple // we're about to use the gpu, lock std::unique_lock lock(gpu_mutex_); - + torch::NoGradGuard no_grad; + // Convert the cv image to a tensor torch::Tensor image_tensor = torch::from_blob(image_cropped.data, {image_cropped.rows, @@ -545,7 +542,7 @@ ChannelFeatures LFE::learnedFeaturesToStereoKeypoints( auto point_desc_tensor_ptr = point_desc_valid.contiguous().data_ptr(); left_feat.descriptors = cv::Mat(num_valid, descriptor_size, CV_32F, - point_desc_tensor_ptr); + point_desc_tensor_ptr).clone(); return channel; } @@ -639,6 +636,10 @@ ChannelFeatures LFE::extractStereoFeaturesDisp(const cv::Mat &left_img, // Get disparity for each keypoint torch::Tensor disparity = getDisparityTensor(disp); + + // // torch::Tensor disparity_testing = torch::zeros({1,1,377,512}); + // torch::Tensor disparity_testing = disparity.clone(); + torch::Tensor point_disparities = getKeypointDisparities(disparity, keypoints); // return channel; diff --git a/main/src/vtr_vision/src/modules/preprocessing/image_triangulation_module.cpp b/main/src/vtr_vision/src/modules/preprocessing/image_triangulation_module.cpp index fd33efc7..27847815 100644 --- a/main/src/vtr_vision/src/modules/preprocessing/image_triangulation_module.cpp +++ b/main/src/vtr_vision/src/modules/preprocessing/image_triangulation_module.cpp @@ -110,7 +110,7 @@ void ImageTriangulationModule::run_(tactic::QueryCache &qdata0, tactic::OutputCa auto num_keypoints = channel.cameras[0].keypoints.size(); // copy the descriptor info from the feature. - landmarks.appearance.descriptors = channel.cameras[0].descriptors.clone(); + landmarks.appearance.descriptors = channel.cameras[0].descriptors; landmarks.appearance.feat_infos = channel.cameras[0].feat_infos; landmarks.appearance.feat_type = channel.cameras[0].feat_type; landmarks.appearance.name = channel.cameras[0].name; diff --git a/models/learned_visual_features.pt b/models/learned_visual_features.pt new file mode 100644 index 00000000..43ef4a93 --- /dev/null +++ b/models/learned_visual_features.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00d052fd7741eb2686b9614e8db4db01e2e21f138ec3851027882a36d2a0a52e +size 165517610