Skip to content

Commit 9f3c0ec

Browse files
committed
Update all per-object/instance buffers all at once
FIXES=[433807402]
1 parent 6d0a47b commit 9f3c0ec

File tree

5 files changed

+137
-127
lines changed

5 files changed

+137
-127
lines changed

filament/src/components/RenderableManager.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,18 @@ void RenderableManager::BuilderDetails::processBoneIndicesAndWights(Engine& engi
434434
mBoneIndicesAndWeightsCount = pairsCount; // only part of mBoneIndicesAndWeights is used for real data
435435
}
436436

437+
RenderableManager::Builder& RenderableManager::Builder::instances(size_t const instanceCount) noexcept {
438+
mImpl->mInstanceCount = clamp((unsigned int)instanceCount, 1u, 32767u);
439+
return *this;
440+
}
441+
442+
RenderableManager::Builder& RenderableManager::Builder::instances(
443+
size_t const instanceCount, InstanceBuffer* instanceBuffer) noexcept {
444+
mImpl->mInstanceCount = clamp(instanceCount, (size_t)1, CONFIG_MAX_INSTANCES);
445+
mImpl->mInstanceBuffer = downcast(instanceBuffer);
446+
return *this;
447+
}
448+
437449
RenderableManager::Builder::Result RenderableManager::Builder::build(Engine& engine, Entity const entity) {
438450
bool isEmpty = true;
439451

@@ -519,18 +531,6 @@ RenderableManager::Builder::Result RenderableManager::Builder::build(Engine& eng
519531
return Success;
520532
}
521533

522-
RenderableManager::Builder& RenderableManager::Builder::instances(size_t const instanceCount) noexcept {
523-
mImpl->mInstanceCount = clamp((unsigned int)instanceCount, 1u, 32767u);
524-
return *this;
525-
}
526-
527-
RenderableManager::Builder& RenderableManager::Builder::instances(
528-
size_t const instanceCount, InstanceBuffer* instanceBuffer) noexcept {
529-
mImpl->mInstanceCount = clamp(instanceCount, (size_t)1, CONFIG_MAX_INSTANCES);
530-
mImpl->mInstanceBuffer = downcast(instanceBuffer);
531-
return *this;
532-
}
533-
534534
// ------------------------------------------------------------------------------------------------
535535

536536
FRenderableManager::FRenderableManager(FEngine& engine) noexcept : mEngine(engine) {

filament/src/details/InstanceBuffer.cpp

Lines changed: 19 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include <backend/DriverEnums.h>
3030
#include <backend/Handle.h>
3131

32+
#include <utils/compiler.h>
33+
#include <utils/debug.h>
3234
#include <utils/Panic.h>
3335
#include <utils/StaticString.h>
3436

@@ -87,7 +89,7 @@ InstanceBuffer* InstanceBuffer::Builder::build(Engine& engine) const {
8789

8890
// ------------------------------------------------------------------------------------------------
8991

90-
FInstanceBuffer::FInstanceBuffer(FEngine& engine, const Builder& builder)
92+
FInstanceBuffer::FInstanceBuffer(FEngine&, const Builder& builder)
9193
: mName(builder.getName()) {
9294
mInstanceCount = builder->mInstanceCount;
9395

@@ -98,22 +100,11 @@ FInstanceBuffer::FInstanceBuffer(FEngine& engine, const Builder& builder)
98100
memcpy(mLocalTransforms.data(), builder->mLocalTransforms,
99101
sizeof(math::mat4f) * mInstanceCount);
100102
}
101-
102-
// Allocate our instance buffer. We always allocate a size to match
103-
// PerRenderableUib, regardless of the number of instances. This is because the buffer
104-
// will get bound to the PER_RENDERABLE UBO, and we can't bind a buffer smaller than the
105-
// full size of the UBO.
106-
DriverApi& driver = engine.getDriverApi();
107-
mHandle = driver.createBufferObject(sizeof(PerRenderableUib),
108-
BufferObjectBinding::UNIFORM, BufferUsage::DYNAMIC);
109-
if (auto name = mName; !name.empty()) {
110-
driver.setDebugTag(mHandle.getId(), std::move(name));
111-
}
112103
}
113104

114-
void FInstanceBuffer::terminate(FEngine& engine) {
115-
DriverApi& driver = engine.getDriverApi();
116-
driver.destroyBufferObject(std::move(mHandle));
105+
void FInstanceBuffer::terminate(FEngine&) {
106+
mHandle.clear();
107+
mOffset = 0;
117108
}
118109

119110
FInstanceBuffer::~FInstanceBuffer() noexcept = default;
@@ -127,27 +118,23 @@ void FInstanceBuffer::setLocalTransforms(
127118
memcpy(mLocalTransforms.data() + offset, localTransforms, sizeof(math::mat4f) * count);
128119
}
129120

130-
void FInstanceBuffer::prepare(DriverApi& driver, math::mat4f const& rootTransform,
131-
const PerRenderableData& ubo) {
121+
void FInstanceBuffer::prepare(
122+
BufferObjectHandle ubh,
123+
PerRenderableData* const UTILS_RESTRICT buffer, uint32_t const offset, uint32_t const count,
124+
math::mat4f const& rootTransform, PerRenderableData const& ubo) {
132125

133-
// TODO: allocate this staging buffer from a pool.
134-
constexpr uint32_t stagingBufferSize = sizeof(PerRenderableUib);
135-
PerRenderableData* stagingBuffer = static_cast<PerRenderableData*>(malloc(stagingBufferSize));
136-
// TODO: consider using JobSystem to parallelize this.
137-
for (size_t i = 0, c = mInstanceCount; i < c; i++) {
138-
stagingBuffer[i] = ubo;
139-
math::mat4f const model = rootTransform * mLocalTransforms[i];
140-
stagingBuffer[i].worldFromModelMatrix = model;
126+
// there is a precondition check for this, so this assert really should never trigger
127+
assert_invariant(count <= mInstanceCount);
141128

129+
for (size_t i = 0, c = count; i < c; i++) {
130+
math::mat4f const model = rootTransform * mLocalTransforms[i];
142131
math::mat3f const m = math::mat3f::getTransformForNormals(model.upperLeft());
143-
stagingBuffer[i].worldFromModelNormalMatrix = math::prescaleForNormals(m);
132+
buffer[offset + i] = ubo;
133+
buffer[offset + i].worldFromModelMatrix = model;
134+
buffer[offset + i].worldFromModelNormalMatrix = math::prescaleForNormals(m);
144135
}
145-
driver.updateBufferObject(mHandle, {
146-
stagingBuffer, stagingBufferSize,
147-
+[](void* buffer, size_t, void*) {
148-
free(buffer);
149-
}
150-
}, 0);
136+
mHandle = ubh;
137+
mOffset = offset * sizeof(PerRenderableData);
151138
}
152139

153140
} // namespace filament

filament/src/details/InstanceBuffer.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
#include <filament/InstanceBuffer.h>
2323

24-
#include <backend/DriverApiForward.h>
2524
#include <backend/Handle.h>
2625

2726
#include <math/mat4.h>
@@ -50,7 +49,10 @@ class FInstanceBuffer : public InstanceBuffer {
5049

5150
void setLocalTransforms(math::mat4f const* localTransforms, size_t count, size_t offset);
5251

53-
void prepare(backend::DriverApi& driver, math::mat4f const& rootTransform, const PerRenderableData& ubo);
52+
void prepare(
53+
backend::BufferObjectHandle ubh,
54+
PerRenderableData* buffer, uint32_t offset, uint32_t count,
55+
math::mat4f const& rootTransform, PerRenderableData const& ubo);
5456

5557
utils::CString const& getName() const noexcept { return mName; }
5658

filament/src/details/View.cpp

Lines changed: 99 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -723,15 +723,15 @@ void FView::prepare(FEngine& engine, DriverApi& driver, RootArenaScope& rootAren
723723
VISIBLE_RENDERABLE | VISIBLE_DIR_SHADOW_RENDERABLE,
724724
VISIBLE_RENDERABLE);
725725

726-
auto beginDirCastersOnly = partition(beginDirCasters, renderableData.end(),
726+
auto const beginDirCastersOnly = partition(beginDirCasters, renderableData.end(),
727727
VISIBLE_RENDERABLE | VISIBLE_DIR_SHADOW_RENDERABLE,
728728
VISIBLE_RENDERABLE | VISIBLE_DIR_SHADOW_RENDERABLE);
729729

730-
auto endDirCastersOnly = partition(beginDirCastersOnly, renderableData.end(),
730+
auto const endDirCastersOnly = partition(beginDirCastersOnly, renderableData.end(),
731731
VISIBLE_RENDERABLE | VISIBLE_DIR_SHADOW_RENDERABLE,
732732
VISIBLE_DIR_SHADOW_RENDERABLE);
733733

734-
auto endPotentialSpotCastersOnly = partition(endDirCastersOnly, renderableData.end(),
734+
auto const endPotentialSpotCastersOnly = partition(endDirCastersOnly, renderableData.end(),
735735
VISIBLE_DYN_SHADOW_RENDERABLE,
736736
VISIBLE_DYN_SHADOW_RENDERABLE);
737737

@@ -759,21 +759,8 @@ void FView::prepare(FEngine& engine, DriverApi& driver, RootArenaScope& rootAren
759759
scene->prepareVisibleRenderables(merged);
760760

761761
// update those UBOs
762-
const size_t size = merged.size() * sizeof(PerRenderableData);
763-
if (size) {
764-
if (mRenderableUBOSize < size) {
765-
// allocate 1/3 extra, with a minimum of 16 objects
766-
const size_t count = std::max(size_t(16u), (4u * merged.size() + 2u) / 3u);
767-
mRenderableUBOSize = uint32_t(count * sizeof(PerRenderableData));
768-
driver.destroyBufferObject(mRenderableUbh);
769-
mRenderableUbh = driver.createBufferObject(
770-
mRenderableUBOSize + sizeof(PerRenderableUib),
771-
BufferObjectBinding::UNIFORM, BufferUsage::DYNAMIC);
772-
} else {
773-
// TODO: should we shrink the underlying UBO at some point?
774-
}
775-
assert_invariant(mRenderableUbh);
776-
updateUBOs(driver, renderableData, merged, mRenderableUbh);
762+
if (!merged.empty()) {
763+
updateUBOs(driver, renderableData, merged);
777764

778765
mCommonRenderableDescriptorSet.setBuffer(
779766
engine.getPerRenderableDescriptorSetLayout(),
@@ -893,65 +880,6 @@ void FView::prepare(FEngine& engine, DriverApi& driver, RootArenaScope& rootAren
893880
colorPassDescriptorSet.prepareMaterialGlobals(mMaterialGlobals);
894881
}
895882

896-
void FView::updateUBOs(
897-
FEngine::DriverApi& driver,
898-
FScene::RenderableSoa& renderableData,
899-
utils::Range<uint32_t> visibleRenderables,
900-
Handle<HwBufferObject> renderableUbh) noexcept {
901-
FILAMENT_TRACING_CALL(FILAMENT_TRACING_CATEGORY_FILAMENT);
902-
903-
// don't allocate more than 16 KiB directly into the render stream
904-
static constexpr size_t MAX_STREAM_ALLOCATION_COUNT = 64; // 16 KiB
905-
const size_t count = visibleRenderables.size();
906-
PerRenderableData* buffer = [&]{
907-
if (count >= MAX_STREAM_ALLOCATION_COUNT) {
908-
// use the heap allocator
909-
auto& bufferPoolAllocator = mSharedState->mBufferPoolAllocator;
910-
return static_cast<PerRenderableData*>(bufferPoolAllocator.get(count * sizeof(PerRenderableData)));
911-
}
912-
// allocate space into the command stream directly
913-
return driver.allocatePod<PerRenderableData>(count);
914-
}();
915-
916-
PerRenderableData const* const uboData = renderableData.data<FScene::UBO>();
917-
mat4f const* const worldTransformData = renderableData.data<FScene::WORLD_TRANSFORM>();
918-
919-
// prepare each InstanceBuffer.
920-
FRenderableManager::InstancesInfo const* instancesData = renderableData.data<FScene::INSTANCES>();
921-
for (uint32_t const i : visibleRenderables) {
922-
auto& instancesInfo = instancesData[i];
923-
if (UTILS_UNLIKELY(instancesInfo.buffer)) {
924-
instancesInfo.buffer->prepare(driver, worldTransformData[i], uboData[i]);
925-
}
926-
}
927-
928-
// copy our data into the UBO for each visible renderable
929-
for (uint32_t const i : visibleRenderables) {
930-
buffer[i] = uboData[i];
931-
}
932-
933-
// We capture state shared between Scene and the update buffer callback, because the Scene could
934-
// be destroyed before the callback executes.
935-
std::weak_ptr<SharedState>* const weakShared =
936-
new (std::nothrow) std::weak_ptr(mSharedState);
937-
938-
// update the UBO
939-
driver.resetBufferObject(renderableUbh);
940-
driver.updateBufferObjectUnsynchronized(renderableUbh, {
941-
buffer, count * sizeof(PerRenderableData),
942-
+[](void* p, size_t const s, void* user) {
943-
std::weak_ptr<SharedState> const* const weakShared =
944-
static_cast<std::weak_ptr<SharedState>*>(user);
945-
if (s >= MAX_STREAM_ALLOCATION_COUNT * sizeof(PerRenderableData)) {
946-
if (auto state = weakShared->lock()) {
947-
state->mBufferPoolAllocator.put(p);
948-
}
949-
}
950-
delete weakShared;
951-
}, weakShared
952-
}, 0);
953-
}
954-
955883
void FView::computeVisibilityMasks(
956884
uint8_t const visibleLayers,
957885
uint8_t const* UTILS_RESTRICT layers,
@@ -983,6 +911,100 @@ void FView::computeVisibilityMasks(
983911
}
984912
}
985913

914+
void FView::updateUBOs(
915+
FEngine::DriverApi& driver,
916+
FScene::RenderableSoa& renderableData,
917+
utils::Range<uint32_t> visibleRenderables) noexcept {
918+
FILAMENT_TRACING_CALL(FILAMENT_TRACING_CATEGORY_FILAMENT);
919+
920+
FRenderableManager::InstancesInfo const* instancesData = renderableData.data<FScene::INSTANCES>();
921+
PerRenderableData const* const uboData = renderableData.data<FScene::UBO>();
922+
mat4f const* const worldTransformData = renderableData.data<FScene::WORLD_TRANSFORM>();
923+
924+
// regular renderables count
925+
size_t const rcount = visibleRenderables.size();
926+
927+
// instanced renderables count
928+
size_t icount = 0;
929+
for (uint32_t const i : visibleRenderables) {
930+
auto& instancesInfo = instancesData[i];
931+
if (instancesInfo.buffer) {
932+
assert_invariant(instancesInfo.count == instancesInfo.buffer->getInstanceCount());
933+
icount += instancesInfo.count;
934+
}
935+
}
936+
937+
// total count of PerRenderableData slots we need
938+
size_t const tcount = rcount + icount;
939+
940+
// resize the UBO accordingly
941+
if (mRenderableUBOElementCount < tcount) {
942+
// allocate 1/3 extra, with a minimum of 16 objects
943+
const size_t count = std::max(size_t(16u), (4u * tcount + 2u) / 3u);
944+
mRenderableUBOElementCount = count;
945+
driver.destroyBufferObject(mRenderableUbh);
946+
mRenderableUbh = driver.createBufferObject(
947+
count * sizeof(PerRenderableData) + sizeof(PerRenderableUib),
948+
BufferObjectBinding::UNIFORM, BufferUsage::DYNAMIC);
949+
} else {
950+
// TODO: should we shrink the underlying UBO at some point?
951+
}
952+
assert_invariant(mRenderableUbh);
953+
954+
955+
// Allocate a staging CPU buffer:
956+
// Don't allocate more than 16 KiB directly into the render stream
957+
static constexpr size_t MAX_STREAM_ALLOCATION_COUNT = 64; // 16 KiB
958+
PerRenderableData* buffer = [&]{
959+
if (tcount >= MAX_STREAM_ALLOCATION_COUNT) {
960+
// use the heap allocator
961+
auto& bufferPoolAllocator = mSharedState->mBufferPoolAllocator;
962+
return static_cast<PerRenderableData*>(bufferPoolAllocator.get(tcount * sizeof(PerRenderableData)));
963+
}
964+
// allocate space into the command stream directly
965+
return driver.allocatePod<PerRenderableData>(tcount);
966+
}();
967+
968+
969+
// TODO: consider using JobSystem to parallelize this.
970+
uint32_t j = rcount;
971+
for (uint32_t const i: visibleRenderables) {
972+
// even the instanced ones are copied here because we need to maintain the offsets
973+
// into the buffer currently (we could skip then because it won't be used, but
974+
// for now it's more trouble than it's worth)
975+
buffer[i] = uboData[i];
976+
977+
auto& instancesInfo = instancesData[i];
978+
if (instancesInfo.buffer) {
979+
instancesInfo.buffer->prepare(
980+
mRenderableUbh,
981+
buffer, j, instancesInfo.count,
982+
worldTransformData[i], uboData[i]);
983+
j += instancesInfo.count;
984+
}
985+
}
986+
987+
// We capture state shared between Scene and the update buffer callback, because the Scene could
988+
// be destroyed before the callback executes.
989+
std::weak_ptr<SharedState>* const weakShared = new(std::nothrow) std::weak_ptr(mSharedState);
990+
991+
// update the UBO
992+
driver.resetBufferObject(mRenderableUbh);
993+
driver.updateBufferObjectUnsynchronized(mRenderableUbh, {
994+
buffer, tcount * sizeof(PerRenderableData),
995+
+[](void* p, size_t const s, void* user) {
996+
std::weak_ptr<SharedState> const* const weakShared =
997+
static_cast<std::weak_ptr<SharedState>*>(user);
998+
if (s >= MAX_STREAM_ALLOCATION_COUNT * sizeof(PerRenderableData)) {
999+
if (auto state = weakShared->lock()) {
1000+
state->mBufferPoolAllocator.put(p);
1001+
}
1002+
}
1003+
delete weakShared;
1004+
}, weakShared
1005+
}, 0);
1006+
}
1007+
9861008
UTILS_NOINLINE
9871009
/* static */ FScene::RenderableSoa::iterator FView::partition(
9881010
FScene::RenderableSoa::iterator const begin,

filament/src/details/View.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -519,9 +519,8 @@ class FView : public View {
519519
Frustum const& frustum, FScene::RenderableSoa& renderableData) const noexcept;
520520

521521
void updateUBOs(backend::DriverApi& driver,
522-
FScene::RenderableSoa& renderableData,
523-
utils::Range<uint32_t> visibleRenderables,
524-
backend::Handle<backend::HwBufferObject> renderableUbh) noexcept;
522+
FScene::RenderableSoa& renderableData,
523+
utils::Range<uint32_t> visibleRenderables) noexcept;
525524

526525
static void prepareVisibleLights(FLightManager const& lcm,
527526
utils::Slice<float> scratch,
@@ -613,7 +612,7 @@ class FView : public View {
613612
Range mVisibleRenderables;
614613
Range mVisibleDirectionalShadowCasters;
615614
Range mSpotLightShadowCasters;
616-
uint32_t mRenderableUBOSize = 0;
615+
uint32_t mRenderableUBOElementCount = 0;
617616
mutable bool mHasDirectionalLighting = false;
618617
mutable bool mHasDynamicLighting = false;
619618
mutable bool mHasShadowing = false;

0 commit comments

Comments
 (0)