Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audio PR - Augmentation support [ Mel-Filter Bank and Normalize ] #130

Merged
merged 463 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
463 commits
Select commit Hold shift + click to select a range
ca6f311
Merge branch 'develop' of https://github.com/ROCm/rocAL into generic-…
fiona-gladwin Mar 26, 2024
8d34902
Merge branch 'generic-name-change' into swbs/audio/pr1
fiona-gladwin Mar 26, 2024
ffb284d
Name change from sample to data
Mar 26, 2024
ff12843
Merge branch 'generic-name-change' of https://github.com/swetha097/ro…
Mar 26, 2024
e53388f
Change from decoded_data_info to DecodedDataInfo
Mar 26, 2024
5f23def
Revert "Change the dims[0] and dims[1] positioning for Spectrogram"
swetha097 Mar 26, 2024
0774f69
Remove audio_decoder_factory.cpp file
fiona-gladwin Mar 26, 2024
90b9d83
Minor change
fiona-gladwin Mar 26, 2024
531e5fb
Change variable name
fiona-gladwin Mar 26, 2024
8b28c37
Merge branch 'swbs/audio/pr5' into swbs/audio/pr6
SundarRajan28 Mar 26, 2024
031b5c1
Merge PR6 and add formatting changes
SundarRajan28 Mar 26, 2024
05daa20
Change copyright year and remove debug statements
SundarRajan28 Mar 26, 2024
0753163
Add Spectrogram Case in unit tests
swetha097 Mar 26, 2024
03d66d5
Merge branch 'swbs/audio/pr5' of https://github.com/swetha097/rocAL i…
swetha097 Mar 26, 2024
8bd9d59
Add spectrogram case in python unit tests
swetha097 Mar 26, 2024
6c4e381
Merge branch 'swbs/audio/pr5' into swbs/audio/pr6
SundarRajan28 Mar 26, 2024
98ce527
Merge branch 'generic-name-change' into swbs/audio/pr1
fiona-gladwin Mar 26, 2024
7d4c1fd
Update the struct variable name in audio files
fiona-gladwin Mar 26, 2024
a3898a8
Fixing issues with downmix node output
SundarRajan28 Mar 27, 2024
ac545ff
Adding ROI updation in downmix node
SundarRajan28 Mar 27, 2024
8eae103
Adding downmix test case for python unit tests
SundarRajan28 Mar 27, 2024
2accd5d
Adding downmix and to_decibels test case in C++ tests
SundarRajan28 Mar 27, 2024
413352a
Merge branch 'swbs/audio/pr6' into swbs/audio/pr9
SundarRajan28 Mar 27, 2024
a9e6497
Minor changes
fiona-gladwin Mar 27, 2024
85d21e6
Change ROCAL_DATA_PATH to exclude rocal_data
fiona-gladwin Mar 27, 2024
6856e9b
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Mar 27, 2024
57c8a0d
Update ROCAL_DATA_PATH to exclude rocal_data
fiona-gladwin Mar 27, 2024
3a86507
Use Pascal case for function names in audio decoder
fiona-gladwin Mar 27, 2024
1158b37
Merge branch 'swbs/audio/pr6' into swbs/audio/pr7
SundarRajan28 Mar 29, 2024
c0cf466
Merge branch 'swbs/audio/pr8' into swbs/audio/pr7
SundarRajan28 Mar 29, 2024
828b19e
Adding mel filter bank augmentation support in rocAL
SundarRajan28 Mar 29, 2024
d02502d
Add resample test case and fixed issues
swetha097 Mar 29, 2024
9931271
NWC - NSR + Slice
swetha097 Mar 29, 2024
8c31aff
Merge remote-tracking branch 'swe_fork/swbs/audio/pr8' into swbs/audi…
swetha097 Mar 29, 2024
732d146
Minor changes
swetha097 Mar 29, 2024
d21f760
Minor changes
swetha097 Mar 29, 2024
faf6b89
Adding latest changes to resample and distribution nodes
SundarRajan28 Mar 29, 2024
009ad53
Merge branch 'swbs/audio/pr8' into swbs/audio/pr7
SundarRajan28 Mar 29, 2024
22b7948
Add test suite changes for NSR
swetha097 Mar 29, 2024
1678aed
Fixing ROI handle errors with distribution nodes
SundarRajan28 Mar 29, 2024
df40c07
Resolve issue with Slice Node
swetha097 Apr 1, 2024
a8d7e6e
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 1, 2024
ef91012
Add audio path for downmix test case
SundarRajan28 Apr 1, 2024
64a2dd9
Merge branch 'swbs/audio/pr6' into swbs/audio/pr8
SundarRajan28 Apr 1, 2024
c9d7cde
Add NSR and Slice in python unit tests
swetha097 Apr 1, 2024
4c3e0a0
Resolving review comments
SundarRajan28 Apr 1, 2024
1e8756f
Add resample and operator overloading test cases in test suite
swetha097 Apr 1, 2024
b0fffe0
Merge branch 'swbs/audio/pr8' of https://github.com/swetha097/rocAL i…
swetha097 Apr 1, 2024
d775e7c
Resolving review comments
SundarRajan28 Apr 1, 2024
1a1e338
Minor change
swetha097 Apr 1, 2024
e12d31c
Resolving review comments
SundarRajan28 Apr 1, 2024
36caf22
Fix review comments
swetha097 Apr 1, 2024
7f46a25
Modify cmake to have SNDFILE in all capital
fiona-gladwin Apr 2, 2024
976c06d
Fix Slice
swetha097 Apr 2, 2024
d950378
Merge remote-tracking branch 'origin/swbs/audio/pr8' into swbs/audio/pr9
swetha097 Apr 2, 2024
28e7c2e
Merge branch 'swbs/audio/pr8' into swbs/audio/pr7
SundarRajan28 Apr 2, 2024
857e995
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 2, 2024
a058dda
Merge branch 'swbs/audio/pr9' of https://github.com/swetha097/rocAL i…
swetha097 Apr 2, 2024
a02ba18
Add NSR and slice in audio test suite
swetha097 Apr 2, 2024
ba54a11
Fix NSR and Slice output issue
swetha097 Apr 2, 2024
8407af8
Resolving review comments
SundarRajan28 Apr 2, 2024
5f887b0
Adding set_seed in master_graph ctor
SundarRajan28 Apr 2, 2024
6662532
Change Name NonSilentRegion to NonSilentRegionDetection
swetha097 Apr 2, 2024
903d462
fix output mismatch for Tensor add Tensor
swetha097 Apr 2, 2024
4336238
Remove nD tensor layouts
swetha097 Apr 2, 2024
089b082
Merge branch 'swbs/audio/pr8' of https://github.com/swetha097/rocAL i…
swetha097 Apr 2, 2024
70aa700
Minor changes
fiona-gladwin Apr 2, 2024
0693605
Add struct for audio info in AudioReadAndDecode
fiona-gladwin Apr 2, 2024
44e654d
Merge branch 'develop' of https://github.com/ROCm/rocAL into generic-…
fiona-gladwin Apr 2, 2024
8537fb8
Merge branch 'develop' into swbs/audio/pr8
SundarRajan28 Apr 2, 2024
c77140c
Merge branch 'generic-name-change' into swbs/audio/pr1
fiona-gladwin Apr 2, 2024
eabf7aa
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 2, 2024
f96a92b
Fix merge conflict
fiona-gladwin Apr 2, 2024
46b7e5e
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 2, 2024
5ea49d6
Clean up - node_slice.cpp
swetha097 Apr 2, 2024
d9a5dfe
Resolve PR comments - 1
swetha097 Apr 2, 2024
861c426
Remove unused variables, add comments, format merge commit
swetha097 Apr 2, 2024
dc8834d
Resolve internal PR comments
swetha097 Apr 2, 2024
e1621cc
Changes in node_slice.cpp
swetha097 Apr 2, 2024
772293c
Change Enum fom UINT TO INT type
swetha097 Apr 2, 2024
fbd88fa
Merge branch 'swbs/audio/pr8' into swbs/audio/pr9
SundarRajan28 Apr 3, 2024
58bedbc
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 3, 2024
6b9baf6
Minor change
swetha097 Apr 3, 2024
a880c6d
Merge remote-tracking branch 'origin/swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 3, 2024
1bd0c92
Fornat
swetha097 Apr 3, 2024
0909c21
Merge remote-tracking branch 'swe_fork/swbs/audio/pr8' into swbs/audi…
swetha097 Apr 3, 2024
757e4f0
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 3, 2024
f655850
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 3, 2024
7b96b7a
Fix pybind issue for normalize
SundarRajan28 Apr 3, 2024
133a166
Changes in node_slice.cpp to move the fill_values to create_node from…
swetha097 Apr 3, 2024
e5e9ad2
Add _vx to scalar variables in the nsr
swetha097 Apr 3, 2024
28d602c
Moving normalize to effects augmentations folder
SundarRajan28 Apr 3, 2024
91d0615
Renaming crop_image_info to CropImageInfo
swetha097 Apr 3, 2024
bb4e5a5
Remove - actual_host_buffers - Unused
swetha097 Apr 3, 2024
50829f6
Rename TimingDBG to TimingDbg
swetha097 Apr 3, 2024
7e1f371
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 3, 2024
d0a456b
Move the instances of DecodedDataInfo to its base class LoaderModule
swetha097 Apr 3, 2024
a80a3a6
Fix a WRN msg in master_graph.cpp
swetha097 Apr 3, 2024
f648feb
Remove a dangling comment
swetha097 Apr 3, 2024
6146bac
Rename _circ_data_info to _circ_buff_data_info
swetha097 Apr 3, 2024
fc96af0
Adding changes to normalize node
SundarRajan28 Apr 3, 2024
47263d9
Add Glob to CMakeLists.txt
fiona-gladwin Apr 4, 2024
8623be3
Rename SndFileDecoder to GenericAudioDecoder
fiona-gladwin Apr 4, 2024
c4af22c
Merge branch 'develop' of https://github.com/ROCm/rocAL into generic-…
fiona-gladwin Apr 4, 2024
5b9be3d
Merge branch 'generic-name-change' of https://github.com/swetha097/ro…
fiona-gladwin Apr 4, 2024
47dea85
Merge branch 'generic-name-change' into swbs/audio/pr1
fiona-gladwin Apr 4, 2024
660071b
Fix build issues
fiona-gladwin Apr 4, 2024
5bd4fa8
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 4, 2024
4f9ab6b
Minor change
fiona-gladwin Apr 4, 2024
6c430cf
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 4, 2024
416180e
Update python API README.md for audio unit test
fiona-gladwin Apr 4, 2024
0f0be88
Update audio unit test README
fiona-gladwin Apr 4, 2024
1e788a8
Add debug and changes to normalize node and test case
SundarRajan28 Apr 5, 2024
5f587fa
Minor fixes for MFB
SundarRajan28 Apr 5, 2024
10f25d5
Add Normalize and MFB test cases in C++ tests
swetha097 Apr 5, 2024
e480f98
Merge remote-tracking branch 'swe_fork/swbs/audio/pr7' into swbs/audi…
swetha097 Apr 5, 2024
07b1467
Revert "Merge remote-tracking branch 'swe_fork/swbs/audio/pr7' into s…
swetha097 Apr 5, 2024
c289085
Fix cpp unit test
swetha097 Apr 5, 2024
f1270fc
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 5, 2024
083128a
Minor Change
swetha097 Apr 5, 2024
e004aba
Fix C++ audio unit test for 2D data
swetha097 Apr 5, 2024
d09cdb5
Minor changes
SundarRajan28 Apr 5, 2024
9d91ba3
Minor fix in verify output
swetha097 Apr 5, 2024
fb226e8
Change in API docs
SundarRajan28 Apr 5, 2024
3114a18
Merge branch 'swbs/audio/pr2' into swbs/audio/pr3
SundarRajan28 Apr 8, 2024
ac6e6e3
Merge branch 'swbs/audio/pr3' into audio_pr4
SundarRajan28 Apr 8, 2024
876c9ad
Merge remote-tracking branch 'swe_fork/audio_pr4' into swbs/audio/pr5
swetha097 Apr 8, 2024
9445e6c
Adding missed param in python unit tests
SundarRajan28 Apr 8, 2024
12b4801
Merge branch 'swbs/audio/pr5' into swbs/audio/pr6
SundarRajan28 Apr 8, 2024
69d2448
Merge branch 'swbs/audio/pr6' into swbs/audio/pr8
SundarRajan28 Apr 8, 2024
39b5f00
Merge branch 'swbs/audio/pr8' into swbs/audio/pr9
SundarRajan28 Apr 8, 2024
006d23f
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 8, 2024
e496f3a
Revert "Add Glob to CMakeLists.txt"
fiona-gladwin Apr 10, 2024
5df0055
Merge branch 'develop' of https://github.com/ROCm/rocAL into swbs/aud…
fiona-gladwin Apr 10, 2024
7dc7092
Fix include headers for Audio files
fiona-gladwin Apr 10, 2024
19e30cf
Fix copy data 2D
fiona-gladwin Apr 10, 2024
34deb3b
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 10, 2024
4c02dfb
Minor changes
fiona-gladwin Apr 11, 2024
e3f350f
Pass decoded data info to load routine instead of separate vectors
fiona-gladwin Apr 11, 2024
67cda83
Update CHANGELOG.md
fiona-gladwin Apr 11, 2024
d36df07
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 11, 2024
42c844d
Update CHANGELOG.md
fiona-gladwin Apr 11, 2024
8b1c59f
Change swap_handle_time variable name in loader
fiona-gladwin Apr 11, 2024
241ce67
Merge remote-tracking branch 'swe_fork/swbs/audio/pr2' into swbs/audi…
swetha097 Apr 11, 2024
07ba1f6
Update the changelog.md
swetha097 Apr 11, 2024
83513fb
Update ChangeLog.md
swetha097 Apr 11, 2024
31959c2
Merge branch 'swbs/audio/pr5' of https://github.com/swetha097/rocAL i…
swetha097 Apr 11, 2024
6fd3621
Update ChangeLog.md
swetha097 Apr 11, 2024
abc63c9
Merge branch 'swbs/audio/pr3' into audio_pr4
SundarRajan28 Apr 11, 2024
bb8908b
Update CHANGELOG.md
SundarRajan28 Apr 11, 2024
91fed39
Formatting changes
fiona-gladwin Apr 11, 2024
ee3606b
Merge branch 'audio_pr4' into swbs/audio/pr5
SundarRajan28 Apr 11, 2024
6a80714
Update doxygen comments
fiona-gladwin Apr 11, 2024
a19086b
Merge branch 'swbs/audio/pr5' into swbs/audio/pr6
SundarRajan28 Apr 11, 2024
689985d
Move file source reader from readers/image to readers folder
fiona-gladwin Apr 11, 2024
db758fd
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 11, 2024
6bf44de
Merge branch 'swbs/audio/pr6' into swbs/audio/pr8
SundarRajan28 Apr 11, 2024
3d0f485
Merge branch 'swbs/audio/pr8' into swbs/audio/pr9
SundarRajan28 Apr 11, 2024
edf81ad
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 11, 2024
67190bf
Update README and add doxygen description
fiona-gladwin Apr 11, 2024
ffdcb0a
Update CMakeLists and README for audio test
fiona-gladwin Apr 11, 2024
b2de5f4
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 11, 2024
d000af0
Update README for audio test
fiona-gladwin Apr 11, 2024
7415447
Minor fix
fiona-gladwin Apr 12, 2024
f6bffef
Merge branch 'develop' of https://github.com/ROCm/rocAL into swbs/aud…
fiona-gladwin Apr 12, 2024
cb034b0
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 12, 2024
d8031b5
Merge remote-tracking branch 'swe_fork/swbs/audio/pr2' into swbs/audi…
swetha097 Apr 12, 2024
d894aba
Fix merge from PR 2
swetha097 Apr 12, 2024
689c55f
Minor changes shard_count argument name
fiona-gladwin Apr 12, 2024
1079d50
Rename set and get functions of data_info to decoded_data_info
fiona-gladwin Apr 12, 2024
1f63cab
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 12, 2024
2967b68
Merge branch 'swbs/audio/pr3' into audio_pr4
SundarRajan28 Apr 16, 2024
36a9516
Merge branch 'audio_pr4' into swbs/audio/pr5
SundarRajan28 Apr 17, 2024
fb7a52b
Merge branch 'swbs/audio/pr5' into swbs/audio/pr6
SundarRajan28 Apr 17, 2024
b3823c8
Merge branch 'swbs/audio/pr6' into swbs/audio/pr8
SundarRajan28 Apr 17, 2024
4de03a5
Merge branch 'swbs/audio/pr8' into swbs/audio/pr9
SundarRajan28 Apr 17, 2024
0161204
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 17, 2024
42d1bb1
Merge remote-tracking branch 'upstream/develop' into swbs/audio/pr2
SundarRajan28 Apr 17, 2024
3375f41
Merge branch 'swbs/audio/pr2' into swbs/audio/pr3
SundarRajan28 Apr 17, 2024
d7c8884
Merge branch 'swbs/audio/pr3' into audio_pr4
SundarRajan28 Apr 17, 2024
513fd78
Merge branch 'audio_pr4' into swbs/audio/pr5
SundarRajan28 Apr 17, 2024
44cefd6
Merge branch 'swbs/audio/pr5' into swbs/audio/pr6
SundarRajan28 Apr 17, 2024
c100e80
Merge branch 'swbs/audio/pr6' into swbs/audio/pr8
SundarRajan28 Apr 17, 2024
9698308
Merge branch 'swbs/audio/pr8' into swbs/audio/pr9
SundarRajan28 Apr 17, 2024
23dad87
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Apr 17, 2024
d928c48
Merge branch 'develop' of https://github.com/ROCm/rocAL into swbs/aud…
fiona-gladwin Apr 17, 2024
c0d2309
Merge branch 'swbs/audio/pr1' into swbs/audio/pr2
fiona-gladwin Apr 17, 2024
c01325d
Revert empty line removed in CMakeLists.txt
fiona-gladwin Apr 17, 2024
549def5
Removed prefix original for audio vectors
fiona-gladwin Apr 17, 2024
c1d9cc5
Resolve PR comments
swetha097 Apr 18, 2024
7874f09
Add @params to all args in pytorch.py
swetha097 Apr 18, 2024
ef9a21b
Fix build issue
swetha097 Apr 18, 2024
0f48da9
Merge branch 'develop' of https://github.com/ROCm/rocAL into swbs/aud…
fiona-gladwin Apr 22, 2024
37921de
Minor changes in unit test
swetha097 Apr 22, 2024
96ace00
Merge branch 'swbs/audio/pr2' of https://github.com/swetha097/rocAL i…
swetha097 Apr 22, 2024
6602895
Minor changes
swetha097 Apr 22, 2024
aa13a35
Change ROCAL instaces to rocAL in pytorch.py
swetha097 Apr 22, 2024
2873d8c
Merge branch 'swbs/audio/pr2' into swbs/audio/pr3
fiona-gladwin Apr 22, 2024
2dd31f8
Resolve the PR comments
swetha097 Apr 23, 2024
1cd9779
Merge branch 'swbs/audio/pr3' of https://github.com/swetha097/rocAL i…
swetha097 Apr 23, 2024
d1d5241
Minor changes in decoders.py - Modify the comment for shard_size
swetha097 Apr 23, 2024
f4bcbca
Merge branch 'swbs/audio/pr2' of https://github.com/swetha097/rocAL i…
fiona-gladwin Apr 23, 2024
d152dca
Merge branch 'swbs/audio/pr3' of https://github.com/swetha097/rocAL i…
fiona-gladwin Apr 23, 2024
e4c5788
Merge branch 'develop' of https://github.com/ROCm/rocAL into swbs/aud…
fiona-gladwin Apr 23, 2024
fb33f06
Merge branch 'swbs/audio/pr3' into audio_pr4
SundarRajan28 Apr 24, 2024
be416ef
Minor changes
swetha097 Apr 24, 2024
8a7bb3c
Address the PR comments
swetha097 Apr 25, 2024
2021ab9
Address Review comments
swetha097 Apr 25, 2024
0c900a9
Introduce Audio layouts
fiona-gladwin May 9, 2024
e75616c
Add layout changes for spectrogram
fiona-gladwin May 9, 2024
e7ed0d8
Fix the unit tests - c++ & python
swetha097 May 9, 2024
528a87a
Merge branch 'swbs/audio/pr5' of https://github.com/swetha097/rocAL i…
fiona-gladwin May 9, 2024
ab993d0
Minor fix
fiona-gladwin May 10, 2024
9757256
Adding changes for spec layout changes
SundarRajan28 May 15, 2024
6b2a06c
Merge branch 'swbs/audio/pr6' into swbs/audio/pr8
SundarRajan28 May 15, 2024
df70d39
Merge branch 'swbs/audio/pr8' into swbs/audio/pr7
SundarRajan28 May 15, 2024
5505ed8
Adding changes to MFB and normalize nodes
SundarRajan28 May 15, 2024
e685c37
Update node_slice.cpp
swetha097 May 16, 2024
fc26afd
Update node_slice.h
swetha097 May 16, 2024
ce91644
Merge branch 'swbs/audio/pr5_layout' into swbs/audio/pr5
fiona-gladwin May 17, 2024
60133c6
Merge branch 'swbs/audio/pr5' into swbs/audio/pr6
fiona-gladwin May 17, 2024
b2c40eb
Merge branch 'swbs/audio/pr6' into swbs/audio/pr8
fiona-gladwin May 17, 2024
84db544
Merge branch 'swbs/audio/pr8' into swbs/audio/pr9
fiona-gladwin May 17, 2024
affe8f3
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
fiona-gladwin May 17, 2024
c41f363
Merge remote-tracking branch 'open_source/develop' into swbs/audio/pr3
swetha097 May 17, 2024
70e12cd
Merge branch 'swbs/audio/pr3' into audio_pr4
swetha097 May 18, 2024
b858b69
Merge branch 'audio_pr4' into swbs/audio/pr5
swetha097 May 18, 2024
5e79034
Merge remote-tracking branch 'origin/swbs/audio/pr5' into HEAD
swetha097 May 18, 2024
66be5a2
Merge branch 'temp_swbs/audio/pr6' into swbs/audio/pr6
swetha097 May 19, 2024
91c4fa1
Merge branch 'swbs/audio/pr6' into swbs/audio/pr8
swetha097 May 19, 2024
750b286
Merge branch 'swbs/audio/pr8' into swbs/audio/pr9
swetha097 May 19, 2024
5276ec2
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
swetha097 May 19, 2024
9b5fab1
Merge remote-tracking branch 'upstream/develop' into swbs/audio/pr7
SundarRajan28 Jun 5, 2024
11b0f96
Merge remote-tracking branch 'upstream/develop' into swbs/audio/pr7
SundarRajan28 Jun 12, 2024
83cecf5
Merge remote-tracking branch 'open_source/develop' into swbs/audio/pr9
swetha097 Jun 12, 2024
6692974
Fix merge conflicts
SundarRajan28 Jun 13, 2024
0cd21de
Merge remote-tracking branch 'upstream/develop' into swbs/audio/pr7
SundarRajan28 Jun 14, 2024
f7e8826
Merge remote-tracking branch 'open_source/develop' into develop
swetha097 Jun 18, 2024
00cdddb
Merge remote-tracking branch 'open_source/develop' into swbs/audio/pr9
swetha097 Jun 18, 2024
0c74d8b
Merge remote-tracking branch 'upstream/develop' into swbs/audio/pr7
SundarRajan28 Jun 20, 2024
20ef6d6
Resolving review comments
SundarRajan28 Jun 21, 2024
7e9b3ce
Merge branch 'develop' into swbs/audio/pr9
swetha097 Jun 21, 2024
7af1c03
Merge remote-tracking branch 'upstream/develop' into swbs/audio/pr9
SundarRajan28 Jun 25, 2024
e2ef16b
Merge branch 'swbs/audio/pr9' into swbs/audio/pr7
SundarRajan28 Jun 25, 2024
c3f5391
Merge remote-tracking branch 'upstream/develop' into swbs/audio/pr7
SundarRajan28 Jun 29, 2024
5326625
Merge branch 'develop' into swbs/audio/pr7
LakshmiKumar23 Jul 2, 2024
c6fe840
Resolving review comments
SundarRajan28 Jul 4, 2024
c8e1791
Merge branch 'develop' into swbs/audio/pr7
LakshmiKumar23 Jul 8, 2024
704badd
Resolving review comments
SundarRajan28 Jul 9, 2024
58d9cdb
Merge branch 'develop' into swbs/audio/pr7
kiritigowda Jul 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
* Support for Audio augmentation - Resample
* Support for TensorTensorAdd and TensorScalarMultiply operations
* Support for Uniform and Normal distribution nodes
* Support for Audio augmentation - NonSilentRegionDetection

Check failure on line 28 in CHANGELOG.md

View workflow job for this annotation

GitHub Actions / Documentation / Markdown

Trailing spaces

CHANGELOG.md:28:60 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.28.2/doc/md009.md
* Support for generic augmentation - Slice

Check failure on line 29 in CHANGELOG.md

View workflow job for this annotation

GitHub Actions / Documentation / Markdown

Trailing spaces

CHANGELOG.md:29:43 MD009/no-trailing-spaces Trailing spaces [Expected: 0 or 2; Actual: 1] https://github.com/DavidAnson/markdownlint/blob/v0.28.2/doc/md009.md
* Support for generic augmentation - Normalize
* Support for Audio augmentation - MelFilterBank

### Optimizations

Expand Down
47 changes: 47 additions & 0 deletions rocAL/include/api/rocal_api_augmentation.h
Original file line number Diff line number Diff line change
Expand Up @@ -1277,4 +1277,51 @@ extern "C" RocalTensor ROCAL_API_CALL rocalSlice(RocalContext context,
RocalOutOfBoundsPolicy policy = RocalOutOfBoundsPolicy::ROCAL_ERROR,
RocalTensorOutputType output_datatype = ROCAL_FP32);

/*! \brief Performs mean-stddev normalization on images.
* \ingroup group_rocal_augmentations
* \param [in] context Rocal context
* \param [in] input Input Rocal tensor
* \param [in] axes axes list for tensor normalization
* \param [in] mean mean value (specified for each channel) for tensor normalization
* \param [in] std_dev standard deviation value (specified for each channel) for tensor normalization
* \param [in] is_output is the output tensor part of the graph output
* \param [in] scale scale value (specified for each channel) for tensor normalization
* \param [in] shift shift value (specified for each channel) for tensor normalization
* \param [in] output_datatype the data type of the output tensor
* \return RocalTensor
*/
extern "C" RocalTensor ROCAL_API_CALL rocalNormalize(RocalContext context, RocalTensor input,
std::vector<unsigned> &axes,
std::vector<float> &mean,
std::vector<float> &std_dev,
bool is_output,
float scale = 1.0, float shift = 0.0,
RocalTensorOutputType output_datatype = ROCAL_FP32);

/*! \brief Applies mel-filter bank augmentation on the given input tensor
* \ingroup group_rocal_augmentations
* \param [in] p_context Rocal context
* \param [in] p_input Input Rocal tensor
* \param [in] is_output is the output tensor part of the graph output
* \param [in] freq_high maximum frequency
* \param [in] freq_low minimum frequency
* \param [in] mel_formula formula used to convert frequencies from hertz to mel and from mel to hertz
* \param [in] nfilter number of mel filters
* \param [in] normalize boolean variable that determine whether to normalize weights / not
* \param [in] sample_rate sampling rate of the audio data
* \param [in] output_datatype the data type of the output tensor
* \return RocalTensor
*/

extern "C" RocalTensor ROCAL_API_CALL rocalMelFilterBank(RocalContext p_context,
RocalTensor p_input,
bool is_output,
float freq_high,
float freq_low,
RocalMelScaleFormula mel_formula,
int nfilter,
bool normalize,
float sample_rate,
RocalTensorOutputType output_datatype);

#endif // MIVISIONX_ROCAL_API_AUGMENTATION_H
14 changes: 14 additions & 0 deletions rocAL/include/api/rocal_api_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,20 @@ enum RocalOutOfBoundsPolicy {
ROCAL_ERROR
};

/*! \brief rocAL MelScale formula enum
* \ingroup group_rocal_types
*/
enum RocalMelScaleFormula {
/*! \brief Slaney
* Follows Slaney’s MATLAB Auditory Modelling Work behavior
*/
ROCAL_MELSCALE_SLANEY = 0,
/*! \brief HTK
* Follows O’Shaughnessy’s book formula, consistent with Hidden Markov Toolkit(HTK), m = 2595 * log10(1 + (f/700))
*/
ROCAL_MELSCALE_HTK
};

/*! \brief Tensor Last Batch Policies
* \ingroup group_rocal_types
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#pragma once
#include "pipeline/graph.h"
#include "pipeline/node.h"
#include "rocal_api_types.h"

class MelFilterBankNode : public Node {
public:
MelFilterBankNode(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs);
MelFilterBankNode() = delete;
void init(float freq_high, float freq_low, RocalMelScaleFormula mel_formula, int nfilter, bool normalize, float sample_rate);

protected:
void create_node() override;
void update_node() override;

private:
float _freq_high = 0;
float _freq_low = 0;
int _mel_formula = 0;
int _nfilter = 128;
float _sample_rate = 44100;
bool _normalize = true;
};
2 changes: 2 additions & 0 deletions rocAL/include/augmentations/augmentations_nodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,5 @@ THE SOFTWARE.
#include "augmentations/arithmetic_augmentations/node_tensor_add_tensor.h"
#include "augmentations/audio_augmentations/node_non_silent_region_detection.h"
#include "augmentations/geometry_augmentations/node_slice.h"
#include "augmentations/effects_augmentations/node_normalize.h"
#include "augmentations/audio_augmentations/node_mel_filter_bank.h"
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#pragma once
#include "pipeline/graph.h"
#include "pipeline/node.h"
#include "parameters/parameter_vx.h"

class NormalizeNode : public Node {
public:
NormalizeNode(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs);
NormalizeNode() = delete;
void init(std::vector<unsigned> &axes, std::vector<float> &mean, std::vector<float> &std_dev, float scale, float shift);

protected:
void create_node() override;
void update_node() override {};

private:
int _axis_mask = 0;
vx_array _mean_vx_array, _stddev_vx_array;
std::vector<unsigned> _axes;
std::vector<float> _mean, _std_dev;
float _scale, _shift;
std::vector<std::vector<uint32_t>> _normalize_roi;
enum NormalizeModes {
DO_NOT_COMPUTE = 0, // Mean and Stddev values are passed from user
COMPUTE_MEAN = 1, // Compute mean from specified axes of input
COMPUTE_STDDEV = 2, // Compute stddev from specified axes of input
COMPUTE_MEAN_STDDEV = 3 // Compute both mean and stddev from specified axes of input
};
};
67 changes: 67 additions & 0 deletions rocAL/source/api/rocal_api_augmentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2505,3 +2505,70 @@ rocalSlice(
}
return output;
}

RocalTensor ROCAL_API_CALL
rocalNormalize(RocalContext p_context, RocalTensor p_input, std::vector<unsigned>& axes,
std::vector<float>& mean, std::vector<float>& std_dev, bool is_output,
float scale, float shift,
RocalTensorOutputType output_datatype) {
Tensor* output = nullptr;
if ((p_context == nullptr) || (p_input == nullptr)) {
ERR("Invalid ROCAL context or invalid input tensor")
return output;
}
auto context = static_cast<Context*>(p_context);
auto input = static_cast<Tensor*>(p_input);
try {
RocalTensorDataType op_tensor_datatype = static_cast<RocalTensorDataType>(output_datatype);
TensorInfo output_info = input->info();
output_info.set_data_type(op_tensor_datatype);
output = context->master_graph->create_tensor(output_info, is_output);
std::shared_ptr<NormalizeNode> normalize_node = context->master_graph->add_node<NormalizeNode>({input}, {output});
normalize_node->init(axes, mean, std_dev, scale, shift);
} catch (const std::exception& e) {
context->capture_error(e.what());
ERR(e.what())
}
return output;
}

RocalTensor ROCAL_API_CALL
rocalMelFilterBank(
RocalContext p_context,
RocalTensor p_input,
bool is_output,
float freq_high,
float freq_low,
RocalMelScaleFormula mel_formula,
int nfilter,
bool normalize,
float sample_rate,
RocalTensorOutputType output_datatype) {
Tensor* output = nullptr;
if ((p_context == nullptr) || (p_input == nullptr)) {
ERR("Invalid ROCAL context or invalid input tensor")
return output;
}
auto context = static_cast<Context*>(p_context);
auto input = static_cast<Tensor*>(p_input);
try {
RocalTensorDataType op_tensor_data_type = (RocalTensorDataType)output_datatype;
if (op_tensor_data_type != RocalTensorDataType::FP32) {
THROW("Only FP32 dtype is supported for MelFilterBank augmentation.")
}
TensorInfo output_info = input->info();
std::vector<size_t> max_dims = output_info.max_shape();
int max_frame = std::max(0ul, max_dims[1]);
std::vector<size_t> dims = output_info.dims();
dims[1] = nfilter;
dims[2] = max_frame;
output_info.set_dims(dims);
output_info.set_data_type(op_tensor_data_type);
output = context->master_graph->create_tensor(output_info, is_output);
context->master_graph->add_node<MelFilterBankNode>({input}, {output})->init(freq_high, freq_low, mel_formula, nfilter, normalize, sample_rate);
} catch (const std::exception& e) {
context->capture_error(e.what());
ERR(e.what())
}
return output;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#include "augmentations/audio_augmentations/node_mel_filter_bank.h"

#include <vx_ext_rpp.h>

#include "pipeline/exception.h"

MelFilterBankNode::MelFilterBankNode(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) : Node(inputs, outputs) {}

void MelFilterBankNode::create_node() {
if (_node)
return;

vx_scalar freq_high_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, &_freq_high);
vx_scalar freq_low_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, &_freq_low);
vx_scalar mel_formula_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &_mel_formula);
vx_scalar nfilter_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &_nfilter);
vx_scalar normalize_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_BOOL, &_normalize);
vx_scalar sample_rate_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_FLOAT32, &_sample_rate);
int input_layout = static_cast<int>(_inputs[0]->info().layout());
int output_layout = static_cast<int>(_outputs[0]->info().layout());
vx_scalar input_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &input_layout);
vx_scalar output_layout_vx = vxCreateScalar(vxGetContext((vx_reference)_graph->get()), VX_TYPE_INT32, &output_layout);
_node = vxExtRppMelFilterBank(_graph->get(), _inputs[0]->handle(), _inputs[0]->get_roi_tensor(), _outputs[0]->handle(), _outputs[0]->get_roi_tensor(), freq_high_vx,
freq_low_vx, mel_formula_vx, nfilter_vx, normalize_vx, sample_rate_vx, input_layout_vx, output_layout_vx);

vx_status status;
if ((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS)
THROW("Adding the mel filter bank (vxRppMelFilterBank) node failed: " + TOSTR(status))
}

void MelFilterBankNode::update_node() {}

void MelFilterBankNode::init(float freq_high, float freq_low, RocalMelScaleFormula mel_formula,
int nfilter, bool normalize, float sample_rate) {
_freq_high = freq_high;
_freq_low = freq_low;
_mel_formula = static_cast<int>(mel_formula);
_nfilter = nfilter;
_normalize = normalize;
_sample_rate = sample_rate;
}
Loading
Loading