@@ -242,6 +242,103 @@ static void SDL_TARGETING("sse2") Blit888to888SurfaceAlphaSSE2(SDL_BlitInfo *inf
242242
243243#endif
244244
245+ #ifdef SDL_LSX_INTRINSICS
246+
247+ static void SDL_TARGETING ("lsx" ) Blit8888to8888PixelAlphaSwizzleLSX (SDL_BlitInfo * info )
248+ {
249+ int width = info -> dst_w ;
250+ int height = info -> dst_h ;
251+ Uint8 * src = info -> src ;
252+ int srcskip = info -> src_skip ;
253+ Uint8 * dst = info -> dst ;
254+ int dstskip = info -> dst_skip ;
255+ const SDL_PixelFormatDetails * srcfmt = info -> src_fmt ;
256+ const SDL_PixelFormatDetails * dstfmt = info -> dst_fmt ;
257+ bool fill_alpha = !dstfmt -> Amask ;
258+ Uint32 dstAmask , dstAshift ;
259+ const Uint8 offsets [] = {0 , 0 , 0 , 0 , 4 , 4 , 4 , 4 , 8 , 8 , 8 , 8 , 12 , 12 , 12 , 12 };
260+
261+ SDL_Get8888AlphaMaskAndShift (dstfmt , & dstAmask , & dstAshift );
262+
263+ const __m128i const_0xff00 = __lsx_vreplgr2vr_h (0xff00 );
264+ const __m128i const_128 = __lsx_vreplgr2vr_b ((Uint8 )128 );
265+ const __m128i const_32641 = __lsx_vreplgr2vr_h (32641 );
266+ const __m128i const_257 = __lsx_vreplgr2vr_h (257 );
267+
268+ // The byte offsets for the start of each pixel
269+ const __m128i mask_offsets = __lsx_vld (offsets , 0 );
270+
271+ const __m128i convert_mask = __lsx_vadd_w (
272+ __lsx_vreplgr2vr_w (
273+ ((srcfmt -> Rshift >> 3 ) << dstfmt -> Rshift ) |
274+ ((srcfmt -> Gshift >> 3 ) << dstfmt -> Gshift ) |
275+ ((srcfmt -> Bshift >> 3 ) << dstfmt -> Bshift )),
276+ mask_offsets );
277+
278+ const __m128i alpha_splat_mask = __lsx_vadd_b (__lsx_vreplgr2vr_b (srcfmt -> Ashift >> 3 ), mask_offsets );
279+ const __m128i alpha_fill_mask = __lsx_vreplgr2vr_w ((int )dstAmask );
280+
281+ while (height -- ) {
282+ int i = 0 ;
283+
284+ for (; i + 4 <= width ; i += 4 ) {
285+ __m128i src128 = __lsx_vld (src , 0 );
286+ __m128i dst128 = __lsx_vld (dst , 0 );
287+
288+ __m128i srcA = __lsx_vshuf_b (src128 , src128 , alpha_splat_mask );
289+ src128 = __lsx_vshuf_b (src128 , src128 , convert_mask );
290+
291+ src128 = __lsx_vor_v (src128 , alpha_fill_mask );
292+
293+ __m128i srca_lo = __lsx_vilvl_b (srcA , srcA );
294+ __m128i srca_hi = __lsx_vilvh_b (srcA , srcA );
295+
296+ srca_lo = __lsx_vxor_v (srca_lo , const_0xff00 );
297+ srca_hi = __lsx_vxor_v (srca_hi , const_0xff00 );
298+
299+ src128 = __lsx_vsub_b (src128 , const_128 );
300+ dst128 = __lsx_vsub_b (dst128 , const_128 );
301+
302+ __m128i tmp = __lsx_vilvl_b (dst128 , src128 );
303+ __m128i dst_lo = __lsx_vsadd_h (__lsx_vmulwev_h_bu_b (srca_lo , tmp ), __lsx_vmulwod_h_bu_b (srca_lo , tmp ));
304+ tmp = __lsx_vilvh_b (dst128 , src128 );
305+ __m128i dst_hi = __lsx_vsadd_h (__lsx_vmulwev_h_bu_b (srca_hi , tmp ), __lsx_vmulwod_h_bu_b (srca_hi , tmp ));
306+
307+ dst_lo = __lsx_vadd_h (dst_lo , const_32641 );
308+ dst_hi = __lsx_vadd_h (dst_hi , const_32641 );
309+
310+ dst_lo = __lsx_vmuh_hu (dst_lo , const_257 );
311+ dst_hi = __lsx_vmuh_hu (dst_hi , const_257 );
312+
313+ dst128 = __lsx_vssrarni_bu_h (dst_hi , dst_lo , 0 );
314+ if (fill_alpha ) {
315+ dst128 = __lsx_vor_v (dst128 , alpha_fill_mask );
316+ }
317+ __lsx_vst (dst128 , dst , 0 );
318+
319+ src += 16 ;
320+ dst += 16 ;
321+ }
322+
323+ for (; i < width ; ++ i ) {
324+ Uint32 src32 = * (Uint32 * )src ;
325+ Uint32 dst32 = * (Uint32 * )dst ;
326+ ALPHA_BLEND_SWIZZLE_8888 (src32 , dst32 , srcfmt , dstfmt );
327+ if (fill_alpha ) {
328+ dst32 |= dstAmask ;
329+ }
330+ * (Uint32 * )dst = dst32 ;
331+ src += 4 ;
332+ dst += 4 ;
333+ }
334+
335+ src += srcskip ;
336+ dst += dstskip ;
337+ }
338+ }
339+
340+ #endif
341+
245342// fast RGB888->(A)RGB888 blending with surface alpha=128 special case
246343static void BlitRGBtoRGBSurfaceAlpha128 (SDL_BlitInfo * info )
247344{
@@ -1402,6 +1499,11 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
14021499 return Blit8888to8888PixelAlphaSwizzleSSE41 ;
14031500 }
14041501#endif
1502+ #ifdef SDL_LSX_INTRINSICS
1503+ if (SDL_HasLSX ()) {
1504+ return Blit8888to8888PixelAlphaSwizzleLSX ;
1505+ }
1506+ #endif
14051507#if defined(SDL_NEON_INTRINSICS ) && (__ARM_ARCH >= 8 )
14061508 // To prevent "unused function" compiler warnings/errors
14071509 (void )Blit8888to8888PixelAlpha ;
0 commit comments