//----------------------------------------------------------------------------
static const hh_u32 _PageBatchSize = 4;
//----------------------------------------------------------------------------
class CParticleTask_BuildVBPositions : public CAsynchronousJob
{
protected:
TAtomic<hh_u32> *m_DoneCount;
CParticlePage *m_Pages[_PageBatchSize];
hh_u32 m_PagesCount;
TStridedMemoryView<CFloat3, sizeof(CFloat4)> m_DstPos;
/*const*/ CScreenBillboarderQuad *m_Billboarder;
CGuid m_PositionId;
CGuid m_SizeId;
CGuid m_RotationId;
const CFloat4x4 *m_BillboardingMatrix;
virtual bool _VirtualLaunch(SWorkerThreadContext &threadContext) override
{
for (hh_u32 p = 0; p < m_PagesCount; p++)
{
CParticlePage *pp = m_Pages[p];
HH_ASSERT(pp != null);
TStridedMemoryView<const CFloat3> attribPos = pp->StreamForReading<CFloat3>(m_PositionId);
if (g_RenderMode != RenderMode_Point)
{
// billboards
TStridedMemoryView<const float> attribSize = pp->StreamForReading<float>(m_SizeId);
TMemoryView<const float> attribRotation = m_RotationId.Valid() ? pp->StreamForReading<float>(m_RotationId).ToMemoryViewIFP() : TMemoryView<const float>();
HH_ASSERT(!attribPos.Empty());
HH_ASSERT(attribSize.Count() == attribPos.Count());
HH_ASSERT(attribRotation.Empty() || attribRotation.Count() == attribPos.Count());
CBillboarder:

bbContext.m_OutPositions = m_DstPos.MakeSubView(0, attribPos.Count() * m_Billboarder->BillboardVertexCount());
bbContext.m_Centers = attribPos;
bbContext.m_Rads = attribSize;
bbContext.m_Rotations = attribRotation;
m_Billboarder->Align(*m_BillboardingMatrix, bbContext);
m_DstPos += pp->ParticleCount() * m_Billboarder->BillboardVertexCount();
}
else
{
// points
// copy the positions inside the target vertex buffer:
if (attribPos.Stride() == 0x10 && Mem::IsAligned<0x10>(attribPos.Data())) // optimized case, when the source stream is SIMD-friendly
{
static const hh_u32 parallelReduce = 4;
// hh_u32 parallelCount = attribPos.Count() / parallelReduce;
hh_u32 serialCount = attribPos.Count() % parallelReduce;
const hh_u8 * /*HH_RESTRICT*/ src = reinterpret_cast<const hh_u8*>(attribPos.Data()); // stupid VC2010 interleaves load/stores when __restrict is used.. god dammit
hh_u8 * /*HH_RESTRICT*/ dst = reinterpret_cast<hh_u8*>(m_DstPos.Data());
const hh_u8 *srcStop = src + attribPos.Count() * 0x10;
src += parallelReduce * 0x10;
while (src <= srcStop)
{
SIMD::Float4 v0 = SIMD::Float4::LoadAligned16(src + 0x00 - parallelReduce * 0x10);
SIMD::Float4 v1 = SIMD::Float4::LoadAligned16(src + 0x10 - parallelReduce * 0x10);
SIMD::Float4 v2 = SIMD::Float4::LoadAligned16(src + 0x20 - parallelReduce * 0x10);
SIMD::Float4 v3 = SIMD::Float4::LoadAligned16(src + 0x30 - parallelReduce * 0x10);
HH_STATIC_ASSERT(kVertexDynamicSizeInBytesP >= sizeof(CFloat4));
v0.StoreAligned16(dst + 0 * kVertexDynamicSizeInBytesP);
v1.StoreAligned16(dst + 1 * kVertexDynamicSizeInBytesP);
v2.StoreAligned16(dst + 2 * kVertexDynamicSizeInBytesP);
v3.StoreAligned16(dst + 3 * kVertexDynamicSizeInBytesP);
src += parallelReduce * 0x10;
dst += parallelReduce * kVertexDynamicSizeInBytesP;
}
while (serialCount-- != 0)
{
SIMD::Float4 v = SIMD::Float4::LoadAligned16(src - parallelReduce * 0x10);
v.StoreAligned16(dst);
src += 0x10;
dst += kVertexDynamicSizeInBytesP;
}
}
else // generic version
{
for (hh_u32 i = 0; i < attribPos.Count(); i++)
{
m_DstPos[i] = attribPos[i];
}
}
m_DstPos += pp->ParticleCount();
}
}
++(*m_DoneCount);
return true;
}
public:
CParticleTask_BuildVBPositions(TAtomic<hh_u32> *doneCount, CParticlePage *(&pages)[_PageBatchSize], hh_u32 pageCount, const TStridedMemoryView<CFloat3, sizeof(CFloat4)> &dstPos, /*const*/ CScreenBillboarderQuad &billboarder, CGuid positionId, CGuid sizeId, CGuid rotationId, const CFloat4x4 &billboardingMatrix)
: m_DoneCount(doneCount)
, m_PagesCount(pageCount)
, m_DstPos(dstPos)
, m_Billboarder(&billboarder)
, m_PositionId(positionId)
, m_SizeId(sizeId)
, m_RotationId(rotationId)
, m_BillboardingMatrix(&billboardingMatrix)
{
for (hh_u32 i = 0; i < _PageBatchSize; i++)
{
m_Pages[i] = pages[i];
}
}
~CParticleTask_BuildVBPositions()
{
}
};
HH_DECLARE_REFPTRCLASS(ParticleTask_BuildVBPositions);
//----------------------------------------------------------------------------
class CParticleTask_BuildVBColors : public CAsynchronousJob
{
protected:
TAtomic<hh_u32> *m_DoneCount;
CParticlePage *m_Pages[_PageBatchSize];
hh_u32 m_PagesCount;
TStridedMemoryView<CFloat4> m_DstCol;
CGuid m_ColorId;
static HH_ALIGN(0x10) CFloat4 m_ColWhiteA16;
virtual bool _VirtualLaunch(SWorkerThreadContext &threadContext) override
{
for (hh_u32 p = 0; p < m_PagesCount; p++)
{
CParticlePage *pp = m_Pages[p];
HH_ASSERT(pp != null);
TStridedMemoryView<const CFloat4> attribCol = m_ColorId.Valid() ? pp->StreamForReading<CFloat4>(m_ColorId) : TStridedMemoryView<const CFloat4>(&m_ColWhiteA16, pp->ParticleCount(), 0);
const CFloat4 * HH_RESTRICT col = attribCol.Data();
hh_u8 * HH_RESTRICT dst = reinterpret_cast<hh_u8*>(m_DstCol.Data());
hh_u8 * HH_RESTRICT stop = dst + attribCol.Count() * 4 * kVertexDynamicSizeInBytesC; // not really necessary to __restrict that one
// fill colors:
if (attribCol.Stride() == 0x10)
{
while (dst + 2 * 4 * kVertexDynamicSizeInBytesC <= stop)
{
SIMD::Float4 rgba32f_0 = SIMD::Float4::LoadAligned16(col, 0x00);
SIMD::Float4 rgba32f_1 = SIMD::Float4::LoadAligned16(col, 0x10);
SIMD::Float4 bgra32f_0 = rgba32f_0.Swizzle<2,1,0,3>();
SIMD::Float4 bgra32f_1 = rgba32f_1.Swizzle<2,1,0,3>();
SIMD::Float4 color_0 = SIMD::Converters::Float_0_1::Ubyte_x4_Broadcasted(bgra32f_0);
SIMD::Float4 color_1 = SIMD::Converters::Float_0_1::Ubyte_x4_Broadcasted(bgra32f_1);
color_0.StoreAligned16(dst, 0x00);
color_1.StoreAligned16(dst, 0x10);
dst += 2 * 4 * kVertexDynamicSizeInBytesC;
col += 2;
}
while (dst < stop)
{
SIMD::Float4 rgba32f = SIMD::Float4::LoadAligned16(col);
SIMD::Float4 bgra32f = rgba32f.Swizzle<2,1,0,3>();
SIMD::Float4 color = SIMD::Converters::Float_0_1::Ubyte_x4_Broadcasted(bgra32f);
color.StoreAligned16(dst);
dst += 1 * 4 * kVertexDynamicSizeInBytesC;
col += 1;
}
}
else
{
HH_ASSERT(attribCol.Stride() == 0);
SIMD::Float4 rgba32f = SIMD::Float4::LoadAligned16(attribCol.Data());
SIMD::Float4 bgra32f = rgba32f.Swizzle<2,1,0,3>();
SIMD::Float4 colorX4 = SIMD::Converters::Float_0_1::Ubyte_x4_Broadcasted(bgra32f);
while (dst < stop)
{
colorX4.StoreAligned16(dst);
dst += 4 * kVertexDynamicSizeInBytesC;
}
}
m_DstCol += pp->ParticleCount() * 4; // hardcoded: 4 vertices per particle
}
++(*m_DoneCount);
return true;
}
public:
CParticleTask_BuildVBColors(TAtomic<hh_u32> *doneCount, CParticlePage *(&pages)[_PageBatchSize], hh_u32 pageCount, const TStridedMemoryView<CFloat4> &dstCol, CGuid colorId)
: m_DoneCount(doneCount)
, m_PagesCount(pageCount)
, m_DstCol(dstCol)
, m_ColorId(colorId)
{
for (hh_u32 i = 0; i < _PageBatchSize; i++)
{
m_Pages[i] = pages[i];
}
}
~CParticleTask_BuildVBColors()
{
}
};
HH_DECLARE_REFPTRCLASS(ParticleTask_BuildVBColors);
HH_ALIGN(0x10) CFloat4 CParticleTask_BuildVBColors::m_ColWhiteA16 = CFloat4(1.0f);
//----------------------------------------------------------------------------
class CParticleTask_BuildVBTexcoords : public CAsynchronousJob
{
protected:
TAtomic<hh_u32> *m_DoneCount;
CParticlePage *m_Pages[_PageBatchSize];
hh_u32 m_PagesCount;
TStridedMemoryView<CFloat2> m_DstTex;
/*const*/ CScreenBillboarderQuad *m_Billboarder;
CGuid m_TextureId;
TMemoryView<const TRectangleMapper<float>::TCorners> m_RectangleMapperF32;
TMemoryView<const CGuid> m_RectangleMapperIDs;
virtual bool _VirtualLaunch(SWorkerThreadContext &threadContext) override
{
if (g_RenderMode == RenderMode_BillboardTextured)
{
for (hh_u32 p = 0; p < m_PagesCount; p++)
{
CParticlePage *pp = m_Pages[p];
HH_ASSERT(pp != null);
TStridedMemoryView<const float> attribTexId = pp->StreamForReading<float>(m_TextureId);
if (!attribTexId.Empty())
{
bool hasAtlas = true;
if (!hasAtlas)
{
m_Billboarder->FillTexcoords(m_DstTex);
}
else
{
HH_ASSERT(!m_RectangleMapperF32.Empty());
TRectangleMapper<float> mapper(m_RectangleMapperF32, m_RectangleMapperIDs);
// FIXME: make 'TextureID' an integer stream, and patch the necessary particle scripts in the resource directories ?
m_Billboarder->FillTexcoordsFromAtlas(m_DstTex, attribTexId, mapper);
}
}
m_DstTex += pp->ParticleCount() * m_Billboarder->BillboardVertexCount();
}
}
++(*m_DoneCount);
return true;
}
public:
CParticleTask_BuildVBTexcoords(TAtomic<hh_u32> *doneCount, CParticlePage *(&pages)[_PageBatchSize], hh_u32 pageCount, const TStridedMemoryView<CFloat2> &dstTex, /*const*/ CScreenBillboarderQuad &billboarder, CGuid textureId, const TMemoryView<const TRectangleMapper<float>::TCorners> &rectangleMapperF32, const TMemoryView<const CGuid> &rectangleMapperIDs)
: m_DoneCount(doneCount)
, m_PagesCount(pageCount)
, m_DstTex(dstTex)
, m_Billboarder(&billboarder)
, m_TextureId(textureId)
, m_RectangleMapperF32(rectangleMapperF32)
, m_RectangleMapperIDs(rectangleMapperIDs)
{
for (hh_u32 i = 0; i < _PageBatchSize; i++)
{
m_Pages[i] = pages[i];
}
}
~CParticleTask_BuildVBTexcoords()
{
}
};
HH_DECLARE_REFPTRCLASS(ParticleTask_BuildVBTexcoords);