-de fichiers (et je sais que y'a que la liste de fichiers qui est contenue dedans)
-pas loin, oui
// right, loop through all the streams we have left, and setup all the uninitialized ones:
for (hh_u32 i = 0; i < m_FieldSetup.Count(); i++)
{
if (m_FieldSetup[i].m_Evaluator == null)
{
SIMD::Float4 zero = SIMD::Float4::Zero();
SIMD::Float4 v128 = SIMD::Float4::LoadUnaligned(m_FieldSetup[i].m_RawDefaultValueStorageU8);
if (v128.MaskNotEqual_Imm4(zero) == 0)
{
m_FieldSetup[i].m_Evaluator = &_FieldEvaluator_ClearToZero;
}
else
{
hh_u32 lookupId = TIntegerTools::Log2(m_Declaration.m_Fields[i].m_StorageSize);
HH_ASSERT(TIntegerTools::IsPowerOfTwo(m_Declaration.m_Fields[i].m_StorageSize));
HH_ASSERT(lookupId < sizeof(_FieldEvaluatorsClear) / sizeof(_FieldEvaluatorsClear[0]));
m_FieldSetup[i].m_Evaluator = _FieldEvaluatorsClear[lookupId];
}
}
}
// clear the setup evaluators of the fields that are written to by the script, but not read from:
if (m_RuntimeSpawnEvaluator != null)
{
TMemoryView<TPair<CGuid, hh_u32> > scriptFieldMappings = m_RuntimeSpawnEvaluator->FieldMappings();
if (!scriptFieldMappings.Empty())
{
HH_ASSERT(scriptFieldMappings.Count() == m_FieldSetup.Count());
for (hh_u32 i = 0; i < m_FieldSetup.Count(); i++)
{
hh_u32 accessFlags = scriptFieldMappings[i].Second();
if (accessFlags == CParticleEvaluator::FieldUsageFlag_Write) // only write access, no read
{
// CLog::Log(HH_INFO, "#### clearing redundant setup evaluator for field '%s'", m_Declaration.m_Fields[i].m_Name.Data());
// clear the evaluator, its result will be overwritten by the script anyway.
// NOTE: this wouldn't be correct if we had non-pure stream functions, but it's not the case.
m_FieldSetup[i].m_Evaluator = null;
}
}
}
}
#if (HH_SIMD == HH_SIMD_SSE) && (HH_SIMD_VERSION >= HH_SSE2) && defined(USE_EXPANDED_TEXTURE_FOR_BILERP)
// process 2 sample (8 texels) per iteration
float *dstStopX2 = dstStop - 2*4;
while (dstSamples <= dstStopX2)
{
SIMD::Float4 xy_AB = SIMD::Float4::LoadUnaligned(srcTexcoords + 0);
srcTexcoords += 4;
SIMD::Float4 cursor_AB = SIMD::Frac(xy_AB * txDims);
xy_AB *= txDimsV;
__m128i xy_i_AB = _mm_cvttps_epi32(xy_AB._xmm());
__m128i xy_iw_AB = _mm_and_si128(xy_i_AB, wrapMask._xmm_epi32());
__m128i packed_A = _mm_shufflelo_epi16(xy_iw_AB, HH_MM_SHUFFLE(x, z, y, w)); // [...][0000][0Yyy][0000][Xxxx] -> [...][0000][0000][0Yyy][Xxxx]
__m128i packed_B = _mm_shufflehi_epi16(xy_iw_AB, HH_MM_SHUFFLE(x, z, y, w)); // [0000][0Yyy][0000][Xxxx][...] -> [0000][0000][0Yyy][Xxxx][...]
packed_B = _mm_shuffle_epi32(packed_B, HH_MM_SHUFFLE(z, w, x, y));
hh_u32 cell_A = _mm_cvtsi128_si32(packed_A) >> yStrideShift;
hh_u32 cell_B = _mm_cvtsi128_si32(packed_B) >> yStrideShift;
hh_u8 *texel_A = m_PixelsX4 + cell_A * (4 * kBGRA8PixelSizeInBytes);
hh_u8 *texel_B = m_PixelsX4 + cell_B * (4 * kBGRA8PixelSizeInBytes);
cursor_AB *= k16384;
__m128i t0t1t2t3_A = _mm_load_si128(reinterpret_cast<const __m128i*>(texel_A));
__m128i t0t1t2t3_B = _mm_load_si128(reinterpret_cast<const __m128i*>(texel_B));
__m128i t32_AB = _mm_cvttps_epi32(cursor_AB._xmm());
__m128i t16_A = _mm_shufflelo_epi16(t32_AB, HH_MM_SHUFFLE(x,x,z,z)); // xxyy____
__m128i t16_B = _mm_shufflehi_epi16(t32_AB, HH_MM_SHUFFLE(x,x,z,z)); // ____xxyy
__m128i tx16_A = _mm_shuffle_epi32(t16_A, HH_MM_SHUFFLE(x,x,x,x));
__m128i tx16_B = _mm_shuffle_epi32(t16_B, HH_MM_SHUFFLE(z,z,z,z));
const __m128i _zero = _mm_setzero_si128();
__m128fi ty16A;
__m128fi ty16B;
ty16A.i = t16_A;
ty16B.i = t16_B;
ty16A.f = _mm_shuffle_ps(ty16A.f, ty16B.f, HH_MM_SHUFFLE(y,y,w,w)); // FIXME: stay on the integer pipeline
__m128i ty16 = ty16A.i;
__m128i t0t1_A = _mm_unpacklo_epi8(t0t1t2t3_A, _zero); // a c
__m128i t0t1_B = _mm_unpacklo_epi8(t0t1t2t3_B, _zero); // a c
__m128i t2t3_A = _mm_unpackhi_epi8(t0t1t2t3_A, _zero); // b d
__m128i t2t3_B = _mm_unpackhi_epi8(t0t1t2t3_B, _zero); // b d
__m128i delta_A = _mm_sub_epi16(t2t3_A, t0t1_A); // { b-a, d-c }
__m128i delta_B = _mm_sub_epi16(t2t3_B, t0t1_B); // { b-a, d-c }
delta_A = _mm_slli_epi16(delta_A, 2);
delta_B = _mm_slli_epi16(delta_B, 2);
__m128i t2t3xT_A = _mm_mulhi_epi16(delta_A, tx16_A);
__m128i t2t3xT_B = _mm_mulhi_epi16(delta_B, tx16_B);
__m128fi combined1_A;
__m128fi combined1_B;
combined1_A.i = _mm_add_epi16(t0t1_A, t2t3xT_A);
combined1_B.i = _mm_add_epi16(t0t1_B, t2t3xT_B);
/*
__m128i combinedA_A = _mm_unpacklo_epi16(combined1_A, _zero); // [aa00bb00cc00dd00]
__m128i combinedB_A = _mm_unpackhi_epi16(combined1_A, _zero); // [ee00ff00gg00hh00]
__m128i combinedA_B = _mm_unpacklo_epi16(combined1_B, _zero);
__m128i combinedB_B = _mm_unpackhi_epi16(combined1_B, _zero);
*/
__m128fi combinedTransposedA;
__m128fi combinedTransposedB;
combinedTransposedA.f = _mm_shuffle_ps(combined1_A.f, combined1_B.f, HH_MM_SHUFFLE(x,y,x,y)); // [aa][bb][cc][dd] [ii][jj][kk][ll]
combinedTransposedB.f = _mm_shuffle_ps(combined1_A.f, combined1_B.f, HH_MM_SHUFFLE(z,w,z,w)); // [ee][ff][gg][hh] [mm][nn][oo][pp]
__m128i delta2 = _mm_sub_epi16(combinedTransposedB.i, combinedTransposedA.i); // { b-a } - { d-c }
delta2 = _mm_slli_epi16(delta2, 2);
__m128i combinedxT = _mm_mulhi_epi16(delta2, ty16);
__m128i finalCombined16 = _mm_add_epi16(combinedTransposedA.i, combinedxT);
__m128i finalCombined32_A = _mm_unpacklo_epi16(finalCombined16, _zero);
__m128i finalCombined32_B = _mm_unpackhi_epi16(finalCombined16, _zero);
/*
__m128i delta2_A = _mm_sub_epi32(combinedB_A, combinedA_A); // { b-a } - { d-c }
__m128i delta2_B = _mm_sub_epi32(combinedB_B, combinedA_B); // { b-a } - { d-c }
combinedA_A = _mm_slli_epi32(combinedA_A, 14);
combinedA_B = _mm_slli_epi32(combinedA_B, 14);
__m128i combinedxT_A = _mm_mullo_epi32(delta2_A, ty16_A); // [rrrr][0000][gggg][0000][aaaa][0000][aaaa][0000]
__m128i combinedxT_B = _mm_mullo_epi32(delta2_B, ty16_B); // [rrrr][0000][gggg][0000][aaaa][0000][aaaa][0000]
*/
/*
__m128i delta2_A = _mm_sub_epi16(combinedB_A, combinedA_A); // { b-a } - { d-c }
__m128i delta2_B = _mm_sub_epi16(combinedB_B, combinedA_B); // { b-a } - { d-c }
delta2_A = _mm_slli_epi16(delta2_A, 2);
delta2_B = _mm_slli_epi16(delta2_B, 2);
__m128i combinedxT_A = _mm_mulhi_epi16(delta2_A, ty16_A); // [rrrr][0000][gggg][0000][aaaa][0000][aaaa][0000]
__m128i combinedxT_B = _mm_mulhi_epi16(delta2_B, ty16_B); // [rrrr][0000][gggg][0000][aaaa][0000][aaaa][0000]
__m128i finalCombined32_A = _mm_add_epi16(combinedA_A, combinedxT_A);
__m128i finalCombined32_B = _mm_add_epi16(combinedA_B, combinedxT_B);
*/
SIMD::Float4 fpCombined_A = _mm_cvtepi32_ps(finalCombined32_A);
SIMD::Float4 fpCombined_B = _mm_cvtepi32_ps(finalCombined32_B);
SIMD::Float4 texelRGBA_A = fpCombined_A * _Inv255/*_16384*/;
SIMD::Float4 texelRGBA_B = fpCombined_B * _Inv255/*_16384*/;
texelRGBA_A.StoreAligned16(dstSamples + 0);
texelRGBA_B.StoreAligned16(dstSamples + 4);
dstSamples += 8;
}
#endifvince (./17096) :
(vide)