Skip to content

Commit bfbb7c1

Browse files
committed
Minor changes towards 1.6.1, CI version bumps
1 parent 26ad489 commit bfbb7c1

File tree

8 files changed

+114
-95
lines changed

8 files changed

+114
-95
lines changed

.gitlab-ci-internal.yml

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ variables:
88
REBUILD_DOCKER_IMAGE:
99
value: "0"
1010
description: "rebuild and publish docker image with latest emscripten"
11+
EMSDK_VERSION:
12+
value: "3.1.26"
13+
description: "EMSDK version used for WASM build (either version number or latest)"
1114
CCACHE_DIR: "${CI_PROJECT_DIR}/ext/ccache"
1215
CCACHE_MAXSIZE: 500M
1316

@@ -260,25 +263,38 @@ test_macos_x86_64:
260263
- echo -n $CI_REGISTRY_PASSWORD | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY
261264
- mkdir -p empty_context
262265
- if [[ $REBUILD_DOCKER_IMAGE = 1 ]] ; then NO_CACHE_ARG=--no-cache=true ; fi
263-
- docker build -t $DOCKER_IMAGE_TAG $NO_CACHE_ARG -f $DOCKERFILE empty_context
266+
- docker pull $DOCKER_IMAGE_TAG || true
267+
- docker build -t $DOCKER_IMAGE_TAG
268+
-f $DOCKERFILE
269+
--build-arg BUILDKIT_INLINE_CACHE=1
270+
--pull
271+
--cache-from $DOCKER_IMAGE_TAG
272+
$NO_CACHE_ARG
273+
$IMAGE_BUILD_ARGS
274+
empty_context
264275
- docker push $DOCKER_IMAGE_TAG
265276
variables:
266277
DOCKERFILE: ""
267278
DOCKER_IMAGE_TAG: ""
279+
IMAGE_BUILD_ARGS: ""
268280
tags:
269281
- docker-build
270282

271283
rebuild_selenium_docker_image:
272284
extends: .rebuild_docker_image_template
273285
variables:
274286
DOCKERFILE: ".selenium.dockerfile"
275-
DOCKER_IMAGE_TAG: "$CI_REGISTRY_IMAGE/selenium-debian:bullseye"
287+
DOCKER_IMAGE_TAG: "$CI_REGISTRY_IMAGE/selenium-debian:emsdk-$EMSDK_VERSION"
288+
IMAGE_BUILD_ARGS: "--build-arg EMSDK_VER=$EMSDK_VERSION"
276289
rules:
277290
- if: '$CI_PROJECT_PATH == "git/vvdec"'
278291

279292
test_wasm:
280293
extends: .build_test_template
281-
image: $CI_REGISTRY/git/vvdec/selenium-debian:bullseye
294+
image: $CI_REGISTRY/git/vvdec/selenium-debian:emsdk-$EMSDK_VERSION
295+
needs:
296+
- job: rebuild_selenium_docker_image
297+
optional: true
282298
script:
283299
- source /opt/emsdk/emsdk_env.sh
284300
- emcc --version

.selenium.dockerfile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@ RUN apt-get update && \
2727
xz-utils
2828
ENV CMAKE_GENERATOR=Ninja
2929

30+
ARG EMSDK_VER=latest
31+
3032
WORKDIR /opt
3133
RUN git clone https://github.com/emscripten-core/emsdk.git
3234
ENV PATH=$PATH:/opt/emsdk
33-
RUN emsdk install latest && \
34-
emsdk activate latest
35+
RUN emsdk install $EMSDK_VER && \
36+
emsdk activate $EMSDK_VER
3537

3638
# install selenium from debian package
3739
RUN apt-get update && apt-get install -y python3-selenium

source/Lib/CommonLib/Buffer.cpp

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -545,23 +545,25 @@ void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel> &other )
545545
{
546546
CHECK( width != other.height || height != other.width, "Incompatible size" );
547547

548-
if( ( width & 3 ) != 0 || ( height & 3 ) != 0 )
548+
if( ( ( width | height ) & 7 ) == 0 )
549549
{
550-
Pel* dst = buf;
551550
const Pel* src = other.buf;
552-
width = other.height;
553-
height = other.width;
554-
stride = stride < width ? width : stride;
555551

556-
for( unsigned y = 0; y < other.height; y++ )
552+
for( unsigned y = 0; y < other.height; y += 8 )
557553
{
558-
for( unsigned x = 0; x < other.width; x++ )
554+
Pel* dst = buf + y;
555+
556+
for( unsigned x = 0; x < other.width; x += 8 )
559557
{
560-
dst[y + x*stride] = src[x + y * other.stride];
558+
g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride );
559+
560+
dst += 8 * stride;
561561
}
562+
563+
src += 8 * other.stride;
562564
}
563565
}
564-
else if( ( width & 7 ) != 0 || ( height & 7 ) != 0 )
566+
else if( ( ( width | height ) & 3 ) == 0 )
565567
{
566568
const Pel* src = other.buf;
567569

@@ -581,20 +583,18 @@ void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel> &other )
581583
}
582584
else
583585
{
586+
Pel* dst = buf;
584587
const Pel* src = other.buf;
588+
width = other.height;
589+
height = other.width;
590+
stride = stride < width ? width : stride;
585591

586-
for( unsigned y = 0; y < other.height; y += 8 )
592+
for( unsigned y = 0; y < other.height; y++ )
587593
{
588-
Pel* dst = buf + y;
589-
590-
for( unsigned x = 0; x < other.width; x += 8 )
594+
for( unsigned x = 0; x < other.width; x++ )
591595
{
592-
g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride );
593-
594-
dst += 8 * stride;
596+
dst[y + x*stride] = src[x + y * other.stride];
595597
}
596-
597-
src += 8 * other.stride;
598598
}
599599
}
600600
}
@@ -695,6 +695,7 @@ void PelStorage::create( const ChromaFormat _chromaFormat, const Size& _size, co
695695
if( userAlloc && userAlloc->enabled )
696696
{
697697
m_origin[i] = ( Pel* ) userAlloc->create( userAlloc->opaque, (vvdecComponentType)i, sizeof(Pel)*area, MEMORY_ALIGN_DEF_SIZE, &m_allocator[i] );
698+
CHECK( m_origin[i] == nullptr, "external allocator callback failed (returned NULL)." );
698699
m_externAllocator = true;
699700
m_userAlloc = userAlloc;
700701
}

source/Lib/CommonLib/ContextModelling.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ namespace vvdec
5454

5555
static const int prefix_ctx[8] = { 0, 0, 0, 3, 6, 10, 15, 21 };
5656

57-
CoeffCodingContext::CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide )
57+
CoeffCodingContext::CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide, CtxTpl* tplBuf )
5858
: m_chType (toChannelType(component))
5959
, m_width (tu.block(component).width)
6060
, m_height (tu.block(component).height)
@@ -102,7 +102,10 @@ CoeffCodingContext::CoeffCodingContext( const TransformUnit& tu, ComponentID com
102102
, m_bdpcm (isLuma(component) ? tu.cu->bdpcmMode() : tu.cu->bdpcmModeChroma())
103103
, m_regBinLimit ( ( TU::getTbAreaAfterCoefZeroOut( tu, component ) * ( isLuma( component ) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA ) ) >> 4 )
104104
, m_ts (tu.mtsIdx( component ) == MTS_SKIP)
105+
, m_tplBuf (tplBuf)
105106
{
107+
if( !m_ts || tu.cu->slice->getTSResidualCodingDisabledFlag() )
108+
memset( tplBuf, 0, m_width * m_height * sizeof( CtxTpl ) );
106109
}
107110

108111
void CoeffCodingContext::initSubblock( int SubsetId, bool sigGroupFlag )
@@ -115,7 +118,6 @@ void CoeffCodingContext::initSubblock( int SubsetId, bool sigGroupFlag )
115118
m_maxSubPos = m_minSubPos + ( 1 << m_log2CGSize ) - 1;
116119
const bool lastHorGrp = m_subSetPosX == m_widthInGroups - 1;
117120
const bool lastVerGrp = m_subSetPosY == m_heightInGroups - 1;
118-
m_checkTplBnd = lastHorGrp;
119121
if( sigGroupFlag )
120122
{
121123
m_sigCoeffGroupFlag.set ( m_subSetPos );

source/Lib/CommonLib/ContextModelling.h

Lines changed: 44 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,16 @@ POSSIBILITY OF SUCH DAMAGE.
5959
namespace vvdec
6060
{
6161

62+
struct CtxTpl
63+
{
64+
// lower 5 bits are absSum1, upper 3 bits are numPos
65+
uint8_t ctxTpl;
66+
};
67+
6268
struct CoeffCodingContext
6369
{
6470
public:
65-
CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide );
71+
CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide, CtxTpl *tplBuf );
6672
public:
6773
void initSubblock ( int SubsetId, bool sigGroupFlag = false );
6874
public:
@@ -112,42 +118,19 @@ struct CoeffCodingContext
112118

113119
void decNumCtxBins (int n) { m_remainingContextBins -= n; }
114120
void incNumCtxBins (int n) { m_remainingContextBins += n; }
115-
bool checkTplBnd () const { return m_checkTplBnd; }
116121

117-
template<bool checkBnd = false>
118-
unsigned sigCtxIdAbs( int blkPos, const TCoeffSig* coeff, const int state )
122+
unsigned sigCtxIdAbs( const int blkPos, const int state )
119123
{
120-
const uint32_t posY = blkPos >> m_log2BlockWidth;
121-
const uint32_t posX = blkPos & ( ( 1 << m_log2BlockWidth ) - 1 );
122-
const TCoeffSig* pData = coeff + posX + ( posY << m_log2BlockWidth );
123-
const int diag = posX + posY;
124-
int numPos = 0;
125-
int sumAbs = 0;
126-
#define UPDATE(x) {int a=x;sumAbs+=std::min(4+(a&1),a);numPos+=!!a;}
127-
if( checkBnd )
128-
{
129-
const int xLtWmin1 = ( int( posX ) + 1 - int( m_width ) ) >> 31;
130-
const int xLtWmin2 = ( int( posX ) + 2 - int( m_width ) ) >> 31;
131-
132-
UPDATE( ( pData[1] & xLtWmin1 ) );
133-
UPDATE( ( pData[2] & xLtWmin2 ) );
134-
UPDATE( pData[m_width] );
135-
UPDATE( ( pData[m_width+1] & xLtWmin1 ) );
136-
UPDATE( pData[m_width<<1] );
137-
}
138-
else
139-
{
140-
UPDATE( pData[1] );
141-
UPDATE( pData[2] );
142-
UPDATE( pData[m_width] );
143-
UPDATE( pData[m_width+1] );
144-
UPDATE( pData[m_width<<1] );
145-
}
146-
#undef UPDATE
124+
const uint32_t posY = blkPos >> m_log2BlockWidth;
125+
const uint32_t posX = blkPos & ( ( 1 << m_log2BlockWidth ) - 1 );
126+
const int diag = posX + posY;
127+
const int tplVal = m_tplBuf[blkPos].ctxTpl;
128+
const int numPos = tplVal >> 5;
129+
const int sumAbs = tplVal & 31;
147130

148131
int ctxOfs = std::min( ( sumAbs + 1 ) >> 1, 3 ) + ( diag < 2 ? 4 : 0 );
149132

150-
if( m_chType == CHANNEL_TYPE_LUMA )
133+
if( isLuma( m_chType ) )
151134
{
152135
ctxOfs += diag < 5 ? 4 : 0;
153136
}
@@ -156,6 +139,29 @@ struct CoeffCodingContext
156139
return m_sigFlagCtxSet[std::max( 0, state-1 )]( ctxOfs );
157140
}
158141

142+
void absVal1stPass( const int blkPos, TCoeffSig* coeff, const TCoeffSig absLevel1 )
143+
{
144+
CHECKD( !absLevel1, "absLevel1 has to non-zero!" );
145+
146+
coeff[blkPos] = absLevel1;
147+
148+
const uint32_t posY = blkPos >> m_log2BlockWidth;
149+
const uint32_t posX = blkPos & ( ( 1 << m_log2BlockWidth ) - 1 );
150+
151+
auto update_deps = [&]( int offset )
152+
{
153+
auto& ctx = m_tplBuf[blkPos - offset];
154+
ctx.ctxTpl += uint8_t( 32 + absLevel1 );
155+
};
156+
157+
if( posY > 1 ) update_deps( 2 * m_width );
158+
if( posY > 0
159+
&& posX > 0 ) update_deps( m_width + 1 );
160+
if( posY > 0 ) update_deps( m_width );
161+
if( posX > 1 ) update_deps( 2 );
162+
if( posX > 0 ) update_deps( 1 );
163+
}
164+
159165
uint8_t ctxOffsetAbs()
160166
{
161167
int offset = 0;
@@ -186,7 +192,7 @@ struct CoeffCodingContext
186192
{
187193
sum += pData[m_width + 1];
188194
}
189-
}
195+
}
190196
else if (posX+1 < m_width)
191197
{
192198
sum += pData[1];
@@ -198,8 +204,8 @@ struct CoeffCodingContext
198204
if (posY+2 < m_height)
199205
{
200206
sum += pData[m_width];
201-
sum += pData[m_width << 1];
202-
}
207+
sum += pData[m_width << 1];
208+
}
203209
else if (posY+1 < m_height)
204210
{
205211
sum += pData[m_width];
@@ -213,7 +219,7 @@ struct CoeffCodingContext
213219
const uint32_t posX = blkPos & ( ( 1 << m_log2BlockWidth ) - 1 );
214220
const TCoeffSig* posC = coeff + posX + posY * m_width;
215221
int numPos = 0;
216-
#define UPDATE(x) {int a=abs(x);numPos+=!!a;}
222+
#define UPDATE(x) {numPos+=!!x;}
217223
if( posX > 0 )
218224
{
219225
UPDATE( posC[-1] );
@@ -237,7 +243,7 @@ struct CoeffCodingContext
237243
const TCoeffSig* posC = coeff + posX + posY * m_width;
238244

239245
int numPos = 0;
240-
#define UPDATE(x) {int a=abs(x);numPos+=!!a;}
246+
#define UPDATE(x) {numPos+=!!x;}
241247

242248
if (bdpcm)
243249
{
@@ -396,7 +402,6 @@ struct CoeffCodingContext
396402
const int m_lastShiftX;
397403
const int m_lastShiftY;
398404
// modified
399-
bool m_checkTplBnd;
400405
int m_scanPosLast;
401406
int m_subSetId;
402407
int m_subSetPos;
@@ -421,6 +426,7 @@ struct CoeffCodingContext
421426
const bool m_bdpcm;
422427
int m_regBinLimit;
423428
const bool m_ts;
429+
CtxTpl* m_tplBuf;
424430
};
425431

426432

source/Lib/CommonLib/x86/IntraPredX86.h

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,8 @@ void IntraPredAngleCore_SIMD(int16_t* pDstBuf,const ptrdiff_t dstStride,int16_t*
215215
__m256i coeff = _mm256_broadcastsi128_si256(tmp);
216216
for( int x = 0; x < width; x+=16)
217217
{
218-
__m256i src0 = _mm256_lddqu_si256( ( const __m256i * )&refMain[refMainIndex - 1] );//load 16 16 bit reference Pels -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
219-
__m256i src2 = _mm256_castsi128_si256 (_mm_lddqu_si128( ( __m128i const * )&refMain[refMainIndex +4 - 1] ));
220-
__m256i src1 = _mm256_permute2f128_si256 (src0,src0,0x00);
221-
src2 = _mm256_permute2f128_si256 (src2,src2,0x00);
218+
__m256i src1 = _mm256_broadcastsi128_si256( _mm_loadu_si128( ( const __m128i* ) &refMain[refMainIndex - 1] ) );
219+
__m256i src2 = _mm256_broadcastsi128_si256( _mm_loadu_si128( ( const __m128i* ) &refMain[refMainIndex + 4 - 1] ) );
222220
src1 = _mm256_shuffle_epi8(src1,shflmask1); // -1 0 1 2 0 1 2 3 1 2 3 4 2 3 4 5
223221
src2 = _mm256_shuffle_epi8(src2,shflmask1); // 3 4 5 6 4 5 6 7 5 6 7 8 6 7 8 9
224222

@@ -232,11 +230,9 @@ void IntraPredAngleCore_SIMD(int16_t* pDstBuf,const ptrdiff_t dstStride,int16_t*
232230
sum = _mm256_srai_epi32( sum, 6 );
233231

234232
refMainIndex+=8;
235-
236-
src1 = _mm256_permute2f128_si256 (src0,src0,0x1);
237-
src2 = _mm256_inserti128_si256(src2, _mm_lddqu_si128( ( __m128i const * )&refMain[refMainIndex +4 - 1] ), 0x0);
238-
src1 = _mm256_permute2f128_si256 (src1,src1,0x00);
239-
src2 = _mm256_permute2f128_si256 (src2,src2,0x00);
233+
234+
src1 = _mm256_broadcastsi128_si256( _mm_loadu_si128( ( __m128i const* ) &refMain[refMainIndex - 1] ) );
235+
src2 = _mm256_broadcastsi128_si256( _mm_loadu_si128( ( __m128i const* ) &refMain[refMainIndex + 4 - 1] ) );
240236

241237
src1 = _mm256_shuffle_epi8(src1,shflmask1); // -1 0 1 2 0 1 2 3 1 2 3 4 2 3 4 5
242238
src2 = _mm256_shuffle_epi8(src2,shflmask1); // 3 4 5 6 4 5 6 7 5 6 7 8 6 7 8 9
@@ -248,6 +244,7 @@ void IntraPredAngleCore_SIMD(int16_t* pDstBuf,const ptrdiff_t dstStride,int16_t*
248244

249245
sum1 = _mm256_add_epi32( sum1, offset );
250246
sum1 = _mm256_srai_epi32( sum1, 6 );
247+
__m256i
251248
src0 = _mm256_packs_epi32( sum, sum1 );
252249

253250
src0 = _mm256_permute4x64_epi64(src0,0xD8);
@@ -282,11 +279,8 @@ void IntraPredAngleCore_SIMD(int16_t* pDstBuf,const ptrdiff_t dstStride,int16_t*
282279
__m128i tmp = _mm_loadl_epi64( ( __m128i const * )&ff[deltaFract<<2] ); //load 4 16 bit filter coeffs
283280
tmp = _mm_shuffle_epi32(tmp,0x44);
284281
__m256i coeff = _mm256_broadcastsi128_si256(tmp);
285-
__m256i src0 = _mm256_lddqu_si256( ( const __m256i * )&refMain[refMainIndex - 1] );//load 16 16 bit reference Pels -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
286-
// __m256i src2 = _mm256_inserti128_si256(src2, _mm_lddqu_si128( ( __m128i const * )&refMain[refMainIndex +4 - 1] ), 0x0);
287-
__m256i src2 = _mm256_castsi128_si256 (_mm_lddqu_si128( ( __m128i const * )&refMain[refMainIndex +4 - 1] ));
288-
__m256i src1 = _mm256_permute2f128_si256 (src0,src0,0x00);
289-
src2 = _mm256_permute2f128_si256 (src2,src2,0x00);
282+
__m256i src1 = _mm256_broadcastsi128_si256( _mm_loadu_si128( ( __m128i const* ) & refMain[refMainIndex - 1] ) );
283+
__m256i src2 = _mm256_broadcastsi128_si256( _mm_loadu_si128( ( __m128i const* ) & refMain[refMainIndex + 4 - 1] ) );
290284
src1 = _mm256_shuffle_epi8(src1,shflmask1); // -1 0 1 2 0 1 2 3 1 2 3 4 2 3 4 5
291285
src2 = _mm256_shuffle_epi8(src2,shflmask1); // 3 4 5 6 4 5 6 7 5 6 7 8 6 7 8 9
292286

@@ -307,9 +301,6 @@ void IntraPredAngleCore_SIMD(int16_t* pDstBuf,const ptrdiff_t dstStride,int16_t*
307301
deltaPos += intraPredAngle;
308302
}
309303
}
310-
311-
312-
313304
#endif
314305
}
315306
else

0 commit comments

Comments
 (0)