Skip to content

Commit 4568c80

Browse files
committed
ITS: re-enable the possibility of extending tracks
1 parent 1a24064 commit 4568c80

18 files changed

Lines changed: 1646 additions & 6 deletions

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "ITStracking/BoundedAllocator.h"
2020
#include "ITStracking/TimeFrame.h"
2121
#include "ITStracking/Configuration.h"
22+
#include "ITStracking/TrackExtensionCandidate.h"
2223
#include "ITStrackingGPU/Utils.h"
2324

2425
namespace o2::its::gpu
@@ -90,8 +91,13 @@ class TimeFrameGPU : public TimeFrame<NLayers>
9091
void createNeighboursDevice(const unsigned int layer);
9192
void createNeighboursLUTDevice(const int, const unsigned int);
9293
void createTrackITSExtDevice(const size_t);
94+
void loadTrackExtensionStartTracksDevice();
95+
void createTrackExtensionCandidatesDevice(const size_t);
96+
void createTrackExtensionScratchDevice(const int nThreads, const int beamWidth);
97+
void createTrackExtensionResultsDevice(const size_t);
9398
void downloadTrackITSExtDevice();
9499
void downloadCellsNeighboursDevice(std::vector<bounded_vector<CellNeighbour>>&, const int);
100+
void downloadTrackExtensionResultsDevice();
95101
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
96102
void downloadCellsDevice();
97103
void downloadCellsLUTDevice();
@@ -118,13 +124,20 @@ class TimeFrameGPU : public TimeFrame<NLayers>
118124
const auto getDeviceTrackingTopologyView() const { return mDeviceTrackingTopologyView; }
119125
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
120126
auto& getTrackITSExt() { return mTrackITSExt; }
127+
auto& getTrackExtensionResults() { return mTrackExtensionResults; }
121128
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
122129
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
123130
unsigned char* getDeviceUsedClusters(const int);
124131
const o2::base::Propagator* getChainPropagator();
125132

126133
// Hybrid
127134
TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
135+
TrackITSExt* getDeviceTrackExtensionStartTracks() { return mTrackExtensionStartTracksDevice; }
136+
TrackExtensionCandidate<NLayers>* getDeviceTrackExtensionCandidates() { return mTrackExtensionCandidatesDevice; }
137+
int* getDeviceTrackExtensionCandidateOffsets() { return mTrackExtensionCandidateOffsetsDevice; }
138+
TrackExtensionHypothesis<NLayers>* getDeviceActiveTrackExtensionHypotheses() { return mActiveTrackExtensionHypothesesDevice; }
139+
TrackExtensionHypothesis<NLayers>* getDeviceNextTrackExtensionHypotheses() { return mNextTrackExtensionHypothesesDevice; }
140+
TrackExtensionResult<NLayers>* getDeviceTrackExtensionResults() { return mTrackExtensionResultsDevice; }
128141
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
129142
gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
130143
CellNeighbour** getDeviceArrayNeighbours() { return mNeighboursDeviceArray; }
@@ -222,6 +235,13 @@ class TimeFrameGPU : public TimeFrame<NLayers>
222235
float** mCellSeedsChi2DeviceArray;
223236

224237
TrackITSExt* mTrackITSExtDevice;
238+
TrackITSExt* mTrackExtensionStartTracksDevice{nullptr};
239+
TrackExtensionCandidate<NLayers>* mTrackExtensionCandidatesDevice{nullptr};
240+
int* mTrackExtensionCandidateOffsetsDevice{nullptr};
241+
TrackExtensionHypothesis<NLayers>* mActiveTrackExtensionHypothesesDevice{nullptr};
242+
TrackExtensionHypothesis<NLayers>* mNextTrackExtensionHypothesesDevice{nullptr};
243+
TrackExtensionResult<NLayers>* mTrackExtensionResultsDevice{nullptr};
244+
unsigned int mNTrackExtensionResults{0};
225245
std::array<CellNeighbour*, MaxCells> mNeighboursDevice{};
226246
CellNeighbour** mNeighboursDeviceArray{nullptr};
227247
std::array<TrackingFrameInfo*, NLayers> mTrackingFrameInfoDevice;
@@ -238,6 +258,9 @@ class TimeFrameGPU : public TimeFrame<NLayers>
238258

239259
// Temporary buffer for storing output tracks from GPU tracking
240260
bounded_vector<TrackITSExt> mTrackITSExt;
261+
bounded_vector<TrackITSExt> mTrackExtensionStartTracks;
262+
// Temporary buffer for fitted track extension proposals from GPU tracking
263+
bounded_vector<TrackExtensionResult<NLayers>> mTrackExtensionResults;
241264
};
242265

243266
template <int NLayers>

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class TrackerTraitsGPU final : public TrackerTraits<NLayers>
3535
void computeLayerCells(const int iteration) final;
3636
void findCellsNeighbours(const int iteration) final;
3737
void findRoads(const int iteration) final;
38+
void extendTracks(const int iteration) final;
3839

3940
void setBz(float) final;
4041

@@ -47,6 +48,11 @@ class TrackerTraitsGPU final : public TrackerTraits<NLayers>
4748
int getTFNumberOfCells() const override;
4849

4950
private:
51+
bool hasTrackFollower(const int iteration) const;
52+
53+
void buildTrackExtensionCandidates(const int iteration, typename TrackerTraits<NLayers>::TrackExtensionCandidates& candidatesPerTrack) final;
54+
bool materializeTrackExtensionCandidate(TrackITSExt& track, const typename TrackerTraits<NLayers>::TrackExtensionCandidateN& candidate, const int iteration) final;
55+
5056
IndexTableUtilsN* mDeviceIndexTableUtils;
5157
gpu::TimeFrameGPU<NLayers>* mTimeFrameGPU;
5258
};

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
#ifndef ITSTRACKINGGPU_TRACKINGKERNELS_H_
1414
#define ITSTRACKINGGPU_TRACKINGKERNELS_H_
1515

16+
#include <array>
1617
#include <gsl/gsl>
1718

1819
#include "ITStracking/BoundedAllocator.h"
1920
#include "ITStracking/ROFLookupTables.h"
2021
#include "ITStracking/TrackingTopology.h"
22+
#include "ITStracking/TrackExtensionCandidate.h"
2123
#include "ITStrackingGPU/Utils.h"
2224
#include "DetectorsBase/Propagator.h"
2325

@@ -35,6 +37,58 @@ class Cluster;
3537
class TrackITSExt;
3638
class ExternalAllocator;
3739

40+
inline constexpr int kTrackExtensionLaunchBlocks = 60;
41+
inline constexpr int kTrackExtensionLaunchThreadsPerBlock = 256;
42+
inline constexpr int kTrackExtensionLaunchThreads = kTrackExtensionLaunchBlocks * kTrackExtensionLaunchThreadsPerBlock;
43+
44+
template <int NLayers>
45+
void computeTrackExtensionCandidatesHandler(const TrackITSExt* tracks,
46+
const IndexTableUtils<NLayers>* utils,
47+
const typename ROFMaskTable<NLayers>::View& rofMask,
48+
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
49+
const Cluster** clusters,
50+
const unsigned char** usedClusters,
51+
const int** clustersIndexTables,
52+
const int** ROFClusters,
53+
const TrackingFrameInfo** trackingFrameInfo,
54+
TrackExtensionCandidate<NLayers>* candidates,
55+
int* candidateOffsets,
56+
TrackExtensionHypothesis<NLayers>* activeHypotheses,
57+
TrackExtensionHypothesis<NLayers>* nextHypotheses,
58+
const std::array<float, NLayers> layerRadii,
59+
const std::array<float, NLayers> layerxX0,
60+
const int nTracks,
61+
const int nLayers,
62+
const int phiBins,
63+
const int beamWidth,
64+
const bool extendTop,
65+
const bool extendBot,
66+
const float bz,
67+
const float maxChi2ClusterAttachment,
68+
const float maxChi2NDF,
69+
const float nSigmaCutPhi,
70+
const float nSigmaCutZ,
71+
const o2::base::Propagator* propagator,
72+
const o2::base::PropagatorF::MatCorrType matCorrType,
73+
gpu::Stream& stream);
74+
75+
template <int NLayers>
76+
void computeTrackExtensionResultsHandler(const TrackITSExt* tracks,
77+
const TrackExtensionCandidate<NLayers>* candidates,
78+
const int* candidateOffsets,
79+
TrackExtensionResult<NLayers>* results,
80+
const TrackingFrameInfo** trackingFrameInfo,
81+
const std::array<float, NLayers> layerxX0,
82+
const int nTracks,
83+
const int nLayers,
84+
const float bz,
85+
const float maxChi2ClusterAttachment,
86+
const float maxChi2NDF,
87+
const o2::base::Propagator* propagator,
88+
const o2::base::PropagatorF::MatCorrType matCorrType,
89+
const bool shiftRefToCluster,
90+
gpu::Stream& stream);
91+
3892
template <int NLayers>
3993
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
4094
const typename ROFMaskTable<NLayers>::View& rofMask,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <cuda_runtime.h>
1414

15+
#include <algorithm>
1516
#include <unistd.h>
1617
#include <vector>
1718

@@ -581,6 +582,72 @@ void TimeFrameGPU<NLayers>::createTrackITSExtDevice(const size_t nSeeds)
581582
GPUChkErrS(cudaMemset(mTrackITSExtDevice, 0, mNTracks * sizeof(o2::its::TrackITSExt)));
582583
}
583584

585+
template <int NLayers>
586+
void TimeFrameGPU<NLayers>::loadTrackExtensionStartTracksDevice()
587+
{
588+
GPUTimer timer("loading track extension start tracks");
589+
GPULog("gpu-transfer: loading {} track extension start tracks, for {:.2f} MB.", this->mTracks.size(), this->mTracks.size() * sizeof(o2::its::TrackITSExt) / constants::MB);
590+
mTrackExtensionStartTracksDevice = nullptr;
591+
mTrackExtensionStartTracks = bounded_vector<TrackITSExt>(this->mTracks.begin(), this->mTracks.end(), this->getMemoryPool().get());
592+
if (this->mTracks.empty()) {
593+
return;
594+
}
595+
allocMem(reinterpret_cast<void**>(&mTrackExtensionStartTracksDevice), mTrackExtensionStartTracks.size() * sizeof(o2::its::TrackITSExt), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
596+
GPUChkErrS(cudaMemcpy(mTrackExtensionStartTracksDevice, mTrackExtensionStartTracks.data(), mTrackExtensionStartTracks.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyHostToDevice));
597+
}
598+
599+
template <int NLayers>
600+
void TimeFrameGPU<NLayers>::createTrackExtensionCandidatesDevice(const size_t nTracks)
601+
{
602+
GPUTimer timer("reserving track extension candidates");
603+
const size_t nCandidates = nTracks * MaxTrackExtensionCandidatesPerTrack;
604+
GPULog("gpu-allocation: reserving {} track extension candidates, for {:.2f} MB.", nCandidates, nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
605+
mTrackExtensionCandidatesDevice = nullptr;
606+
mTrackExtensionCandidateOffsetsDevice = nullptr;
607+
if (nCandidates == 0) {
608+
return;
609+
}
610+
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidatesDevice), nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
611+
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidateOffsetsDevice), (nTracks + 1) * sizeof(int), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
612+
}
613+
614+
template <int NLayers>
615+
void TimeFrameGPU<NLayers>::createTrackExtensionScratchDevice(const int nThreads, const int beamWidth)
616+
{
617+
GPUTimer timer("reserving track extension scratch");
618+
const size_t nHypotheses = static_cast<size_t>(std::max(1, nThreads)) * std::max(1, beamWidth);
619+
GPULog("gpu-allocation: reserving {} track extension hypotheses per scratch buffer, for {:.2f} MB each.", nHypotheses, nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>) / constants::MB);
620+
mActiveTrackExtensionHypothesesDevice = nullptr;
621+
mNextTrackExtensionHypothesesDevice = nullptr;
622+
if (nHypotheses == 0) {
623+
return;
624+
}
625+
allocMem(reinterpret_cast<void**>(&mActiveTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
626+
allocMem(reinterpret_cast<void**>(&mNextTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
627+
}
628+
629+
template <int NLayers>
630+
void TimeFrameGPU<NLayers>::createTrackExtensionResultsDevice(const size_t nTracks)
631+
{
632+
GPUTimer timer("reserving fitted track extension results");
633+
mNTrackExtensionResults = 0;
634+
if (nTracks == 0 || mTrackExtensionCandidateOffsetsDevice == nullptr) {
635+
mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(0, {}, this->getMemoryPool().get());
636+
mTrackExtensionResultsDevice = nullptr;
637+
return;
638+
}
639+
int nResults{0};
640+
GPUChkErrS(cudaMemcpy(&nResults, mTrackExtensionCandidateOffsetsDevice + nTracks, sizeof(int), cudaMemcpyDeviceToHost));
641+
mNTrackExtensionResults = nResults;
642+
GPULog("gpu-allocation: reserving {} fitted track extension results, for {:.2f} MB.", mNTrackExtensionResults, mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB);
643+
mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(mNTrackExtensionResults, {}, this->getMemoryPool().get());
644+
mTrackExtensionResultsDevice = nullptr;
645+
if (mTrackExtensionResults.empty()) {
646+
return;
647+
}
648+
allocMem(reinterpret_cast<void**>(&mTrackExtensionResultsDevice), mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
649+
}
650+
584651
template <int NLayers>
585652
void TimeFrameGPU<NLayers>::downloadCellsDevice()
586653
{
@@ -627,6 +694,17 @@ void TimeFrameGPU<NLayers>::downloadTrackITSExtDevice()
627694
GPUChkErrS(cudaMemcpy(mTrackITSExt.data(), mTrackITSExtDevice, mTrackITSExt.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyDeviceToHost));
628695
}
629696

697+
template <int NLayers>
698+
void TimeFrameGPU<NLayers>::downloadTrackExtensionResultsDevice()
699+
{
700+
GPUTimer timer("downloading fitted track extension results");
701+
GPULog("gpu-transfer: downloading {} fitted track extension results, for {:.2f} MB.", mTrackExtensionResults.size(), mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB);
702+
if (mTrackExtensionResults.empty()) {
703+
return;
704+
}
705+
GPUChkErrS(cudaMemcpy(mTrackExtensionResults.data(), mTrackExtensionResultsDevice, mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>), cudaMemcpyDeviceToHost));
706+
}
707+
630708
template <int NLayers>
631709
void TimeFrameGPU<NLayers>::unregisterHostMemory(const int maxLayers)
632710
{

0 commit comments

Comments
 (0)