@@ -3858,127 +3858,6 @@ static etcpak_force_inline uint64_t ProcessAlpha_ETC2( const uint8_t* src )
3858
3858
#endif
3859
3859
}
3860
3860
3861
- void CompressEtc1Alpha ( const uint32_t * src, uint64_t * dst, uint32_t blocks, size_t width )
3862
- {
3863
- int w = 0 ;
3864
- uint32_t buf[4 *4 ];
3865
- do
3866
- {
3867
- #ifdef __SSE4_1__
3868
- __m128 px0 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 0 ) ) );
3869
- __m128 px1 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 1 ) ) );
3870
- __m128 px2 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 2 ) ) );
3871
- __m128 px3 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 3 ) ) );
3872
-
3873
- _MM_TRANSPOSE4_PS ( px0, px1, px2, px3 );
3874
-
3875
- __m128i c0 = _mm_castps_si128 ( px0 );
3876
- __m128i c1 = _mm_castps_si128 ( px1 );
3877
- __m128i c2 = _mm_castps_si128 ( px2 );
3878
- __m128i c3 = _mm_castps_si128 ( px3 );
3879
-
3880
- __m128i mask = _mm_setr_epi32 ( 0x03030303 , 0x07070707 , 0x0b0b0b0b , 0x0f0f0f0f );
3881
- __m128i p0 = _mm_shuffle_epi8 ( c0, mask );
3882
- __m128i p1 = _mm_shuffle_epi8 ( c1, mask );
3883
- __m128i p2 = _mm_shuffle_epi8 ( c2, mask );
3884
- __m128i p3 = _mm_shuffle_epi8 ( c3, mask );
3885
-
3886
- _mm_store_si128 ( (__m128i*)(buf + 0 ), p0 );
3887
- _mm_store_si128 ( (__m128i*)(buf + 4 ), p1 );
3888
- _mm_store_si128 ( (__m128i*)(buf + 8 ), p2 );
3889
- _mm_store_si128 ( (__m128i*)(buf + 12 ), p3 );
3890
-
3891
- src += 4 ;
3892
- #else
3893
- auto ptr = buf;
3894
- for ( int x=0 ; x<4 ; x++ )
3895
- {
3896
- unsigned int a = *src >> 24 ;
3897
- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3898
- src += width;
3899
- a = *src >> 24 ;
3900
- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3901
- src += width;
3902
- a = *src >> 24 ;
3903
- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3904
- src += width;
3905
- a = *src >> 24 ;
3906
- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3907
- src -= width * 3 - 1 ;
3908
- }
3909
- #endif
3910
- if ( ++w == width/4 )
3911
- {
3912
- src += width * 3 ;
3913
- w = 0 ;
3914
- }
3915
- *dst++ = ProcessRGB ( (uint8_t *)buf );
3916
- }
3917
- while ( --blocks );
3918
- }
3919
-
3920
- void CompressEtc2Alpha ( const uint32_t * src, uint64_t * dst, uint32_t blocks, size_t width, bool useHeuristics )
3921
- {
3922
- int w = 0 ;
3923
- uint32_t buf[4 *4 ];
3924
- do
3925
- {
3926
- #ifdef __SSE4_1__
3927
- __m128 px0 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 0 ) ) );
3928
- __m128 px1 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 1 ) ) );
3929
- __m128 px2 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 2 ) ) );
3930
- __m128 px3 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 3 ) ) );
3931
-
3932
- _MM_TRANSPOSE4_PS ( px0, px1, px2, px3 );
3933
-
3934
- __m128i c0 = _mm_castps_si128 ( px0 );
3935
- __m128i c1 = _mm_castps_si128 ( px1 );
3936
- __m128i c2 = _mm_castps_si128 ( px2 );
3937
- __m128i c3 = _mm_castps_si128 ( px3 );
3938
-
3939
- __m128i mask = _mm_setr_epi32 ( 0x03030303 , 0x07070707 , 0x0b0b0b0b , 0x0f0f0f0f );
3940
- __m128i p0 = _mm_shuffle_epi8 ( c0, mask );
3941
- __m128i p1 = _mm_shuffle_epi8 ( c1, mask );
3942
- __m128i p2 = _mm_shuffle_epi8 ( c2, mask );
3943
- __m128i p3 = _mm_shuffle_epi8 ( c3, mask );
3944
-
3945
- _mm_store_si128 ( (__m128i*)(buf + 0 ), p0 );
3946
- _mm_store_si128 ( (__m128i*)(buf + 4 ), p1 );
3947
- _mm_store_si128 ( (__m128i*)(buf + 8 ), p2 );
3948
- _mm_store_si128 ( (__m128i*)(buf + 12 ), p3 );
3949
-
3950
- src += 4 ;
3951
- #else
3952
- auto ptr = buf;
3953
- for ( int x=0 ; x<4 ; x++ )
3954
- {
3955
- unsigned int a = *src >> 24 ;
3956
- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3957
- src += width;
3958
- a = *src >> 24 ;
3959
- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3960
- src += width;
3961
- a = *src >> 24 ;
3962
- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3963
- src += width;
3964
- a = *src >> 24 ;
3965
- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3966
- src -= width * 3 - 1 ;
3967
- }
3968
- #endif
3969
- if ( ++w == width/4 )
3970
- {
3971
- src += width * 3 ;
3972
- w = 0 ;
3973
- }
3974
- *dst++ = ProcessRGB_ETC2 ( (uint8_t *)buf, useHeuristics );
3975
- }
3976
- while ( --blocks );
3977
- }
3978
-
3979
- #include < chrono>
3980
- #include < thread>
3981
-
3982
3861
void CompressEtc1Rgb ( const uint32_t * src, uint64_t * dst, uint32_t blocks, size_t width )
3983
3862
{
3984
3863
int w = 0 ;
0 commit comments