|
12 | 12 |
|
13 | 13 | #include <cuda_runtime.h> |
14 | 14 |
|
| 15 | +#include <algorithm> |
15 | 16 | #include <unistd.h> |
16 | 17 | #include <vector> |
17 | 18 |
|
@@ -581,6 +582,72 @@ void TimeFrameGPU<NLayers>::createTrackITSExtDevice(const size_t nSeeds) |
581 | 582 | GPUChkErrS(cudaMemset(mTrackITSExtDevice, 0, mNTracks * sizeof(o2::its::TrackITSExt))); |
582 | 583 | } |
583 | 584 |
|
| 585 | +template <int NLayers> |
| 586 | +void TimeFrameGPU<NLayers>::loadTrackExtensionStartTracksDevice() |
| 587 | +{ |
| 588 | + GPUTimer timer("loading track extension start tracks"); |
| 589 | + GPULog("gpu-transfer: loading {} track extension start tracks, for {:.2f} MB.", this->mTracks.size(), this->mTracks.size() * sizeof(o2::its::TrackITSExt) / constants::MB); |
| 590 | + mTrackExtensionStartTracksDevice = nullptr; |
| 591 | + mTrackExtensionStartTracks = bounded_vector<TrackITSExt>(this->mTracks.begin(), this->mTracks.end(), this->getMemoryPool().get()); |
| 592 | + if (this->mTracks.empty()) { |
| 593 | + return; |
| 594 | + } |
| 595 | + allocMem(reinterpret_cast<void**>(&mTrackExtensionStartTracksDevice), mTrackExtensionStartTracks.size() * sizeof(o2::its::TrackITSExt), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); |
| 596 | + GPUChkErrS(cudaMemcpy(mTrackExtensionStartTracksDevice, mTrackExtensionStartTracks.data(), mTrackExtensionStartTracks.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyHostToDevice)); |
| 597 | +} |
| 598 | + |
| 599 | +template <int NLayers> |
| 600 | +void TimeFrameGPU<NLayers>::createTrackExtensionCandidatesDevice(const size_t nTracks) |
| 601 | +{ |
| 602 | + GPUTimer timer("reserving track extension candidates"); |
| 603 | + const size_t nCandidates = nTracks * MaxTrackExtensionCandidatesPerTrack; |
| 604 | + GPULog("gpu-allocation: reserving {} track extension candidates, for {:.2f} MB.", nCandidates, nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB); |
| 605 | + mTrackExtensionCandidatesDevice = nullptr; |
| 606 | + mTrackExtensionCandidateOffsetsDevice = nullptr; |
| 607 | + if (nCandidates == 0) { |
| 608 | + return; |
| 609 | + } |
| 610 | + allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidatesDevice), nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); |
| 611 | + allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidateOffsetsDevice), (nTracks + 1) * sizeof(int), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); |
| 612 | +} |
| 613 | + |
| 614 | +template <int NLayers> |
| 615 | +void TimeFrameGPU<NLayers>::createTrackExtensionScratchDevice(const int nThreads, const int beamWidth) |
| 616 | +{ |
| 617 | + GPUTimer timer("reserving track extension scratch"); |
| 618 | + const size_t nHypotheses = static_cast<size_t>(std::max(1, nThreads)) * std::max(1, beamWidth); |
| 619 | + GPULog("gpu-allocation: reserving {} track extension hypotheses per scratch buffer, for {:.2f} MB each.", nHypotheses, nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>) / constants::MB); |
| 620 | + mActiveTrackExtensionHypothesesDevice = nullptr; |
| 621 | + mNextTrackExtensionHypothesesDevice = nullptr; |
| 622 | + if (nHypotheses == 0) { |
| 623 | + return; |
| 624 | + } |
| 625 | + allocMem(reinterpret_cast<void**>(&mActiveTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); |
| 626 | + allocMem(reinterpret_cast<void**>(&mNextTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); |
| 627 | +} |
| 628 | + |
| 629 | +template <int NLayers> |
| 630 | +void TimeFrameGPU<NLayers>::createTrackExtensionResultsDevice(const size_t nTracks) |
| 631 | +{ |
| 632 | + GPUTimer timer("reserving fitted track extension results"); |
| 633 | + mNTrackExtensionResults = 0; |
| 634 | + if (nTracks == 0 || mTrackExtensionCandidateOffsetsDevice == nullptr) { |
| 635 | + mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(0, {}, this->getMemoryPool().get()); |
| 636 | + mTrackExtensionResultsDevice = nullptr; |
| 637 | + return; |
| 638 | + } |
| 639 | + int nResults{0}; |
| 640 | + GPUChkErrS(cudaMemcpy(&nResults, mTrackExtensionCandidateOffsetsDevice + nTracks, sizeof(int), cudaMemcpyDeviceToHost)); |
| 641 | + mNTrackExtensionResults = nResults; |
| 642 | + GPULog("gpu-allocation: reserving {} fitted track extension results, for {:.2f} MB.", mNTrackExtensionResults, mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB); |
| 643 | + mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(mNTrackExtensionResults, {}, this->getMemoryPool().get()); |
| 644 | + mTrackExtensionResultsDevice = nullptr; |
| 645 | + if (mTrackExtensionResults.empty()) { |
| 646 | + return; |
| 647 | + } |
| 648 | + allocMem(reinterpret_cast<void**>(&mTrackExtensionResultsDevice), mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK)); |
| 649 | +} |
| 650 | + |
584 | 651 | template <int NLayers> |
585 | 652 | void TimeFrameGPU<NLayers>::downloadCellsDevice() |
586 | 653 | { |
@@ -627,6 +694,17 @@ void TimeFrameGPU<NLayers>::downloadTrackITSExtDevice() |
627 | 694 | GPUChkErrS(cudaMemcpy(mTrackITSExt.data(), mTrackITSExtDevice, mTrackITSExt.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyDeviceToHost)); |
628 | 695 | } |
629 | 696 |
|
| 697 | +template <int NLayers> |
| 698 | +void TimeFrameGPU<NLayers>::downloadTrackExtensionResultsDevice() |
| 699 | +{ |
| 700 | + GPUTimer timer("downloading fitted track extension results"); |
| 701 | + GPULog("gpu-transfer: downloading {} fitted track extension results, for {:.2f} MB.", mTrackExtensionResults.size(), mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB); |
| 702 | + if (mTrackExtensionResults.empty()) { |
| 703 | + return; |
| 704 | + } |
| 705 | + GPUChkErrS(cudaMemcpy(mTrackExtensionResults.data(), mTrackExtensionResultsDevice, mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>), cudaMemcpyDeviceToHost)); |
| 706 | +} |
| 707 | + |
630 | 708 | template <int NLayers> |
631 | 709 | void TimeFrameGPU<NLayers>::unregisterHostMemory(const int maxLayers) |
632 | 710 | { |
|
0 commit comments