@@ -159,6 +159,8 @@ Froxelizer::Froxelizer(FEngine& engine)
159
159
160
160
size_t const froxelBufferByteCount = getFroxelBufferByteCount (driverApi);
161
161
mFroxelBufferEntryCount = froxelBufferByteCount / sizeof (FroxelEntry);
162
+ mFroxelBufferEntryCount &= ~0xF ; // make sure it's a multiple of 16 (helps vectorizing)
163
+ assert_invariant (mFroxelBufferEntryCount >= 16 ); // that's also needed elsewhere
162
164
163
165
size_t const froxelRecordBufferByteCount = getFroxelRecordBufferByteCount (driverApi);
164
166
mFroxelRecordBufferEntryCount = froxelRecordBufferByteCount / sizeof (uint8_t );
@@ -710,11 +712,13 @@ void Froxelizer::froxelizeAssignRecordsCompress() noexcept {
710
712
// this gets very well vectorized...
711
713
712
714
Slice records (mLightRecords );
713
- for (size_t j = 0 , jc = getFroxelBufferEntryCount (); j < jc; j++) {
715
+ for (size_t j = 0 , jc = getFroxelBufferEntryCount () ; j < jc; j++) {
716
+ using container_type = LightRecord::bitset::container_type;
717
+ constexpr size_t r = sizeof (container_type) / sizeof (LightGroupType);
718
+ UTILS_UNROLL
714
719
for (size_t i = 0 ; i < LightRecord::bitset::WORLD_COUNT; i++) {
715
- using container_type = LightRecord::bitset::container_type;
716
- constexpr size_t r = sizeof (container_type) / sizeof (LightGroupType);
717
720
container_type b = froxelThreadData[i * r][j];
721
+ UTILS_UNROLL
718
722
for (size_t k = 0 ; k < r; k++) {
719
723
b |= (container_type (froxelThreadData[i * r + k][j]) << (LIGHT_PER_GROUP * k));
720
724
}
@@ -733,19 +737,21 @@ void Froxelizer::froxelizeAssignRecordsCompress() noexcept {
733
737
const size_t froxelCountX = mFroxelCountX ;
734
738
RecordBufferType* const UTILS_RESTRICT froxelRecords = mRecordBufferUser .data ();
735
739
736
- // initialize the first record with all lights in the scene -- this will be used only if
740
+ // Initialize the first record with all lights in the scene -- this will be used only if
737
741
// we run out of record space.
738
- const uint8_t allLightsCount = uint8_t (std::min (size_t (255 ), allLights.count ()));
742
+
743
+ // Our light count cannot be larger than 255 because it's stored in a uint8_t. This should
744
+ // be guaranteed by CONFIG_MAX_LIGHT_COUNT
745
+ assert_invariant (allLights.count () <= std::numeric_limits<uint8_t >::max ());
746
+
747
+ const uint8_t allLightsCount = allLights.count ();
739
748
offset += allLightsCount;
740
- allLights.forEachSetBit ([point = froxelRecords, froxelRecords](size_t l) mutable {
749
+ allLights.forEachSetBit ([p = froxelRecords](size_t l) mutable {
741
750
// make sure to keep this code branch-less
742
751
const size_t word = l / LIGHT_PER_GROUP;
743
752
const size_t bit = l % LIGHT_PER_GROUP;
744
753
l = (bit * GROUP_COUNT) | (word % GROUP_COUNT);
745
- *point = RecordBufferType (l);
746
- // we need to "cancel" the write operation if we have more than 255 spot or point lights
747
- // (this is a limitation of the data type used to store the light counts per froxel)
748
- point += (point - froxelRecords < 255 ) ? 1 : 0 ;
754
+ *p++ = RecordBufferType (l);
749
755
});
750
756
751
757
// how many froxel record entries were reused (for debugging)
@@ -759,8 +765,10 @@ void Froxelizer::froxelizeAssignRecordsCompress() noexcept {
759
765
}
760
766
761
767
// We have a limitation of 255 spot + 255 point lights per froxel.
768
+ assert_invariant (b.lights .count () <= std::numeric_limits<uint8_t >::max ());
769
+
762
770
// note: initializer list for union cannot have more than one element
763
- FroxelEntry entry{ offset, uint8_t (std::min ( size_t ( 255 ), b.lights .count () )) };
771
+ FroxelEntry entry{ offset, uint8_t (b.lights .count ()) };
764
772
const size_t lightCount = entry.count ();
765
773
766
774
if (UTILS_UNLIKELY (offset + lightCount >= mFroxelRecordBufferEntryCount )) {
@@ -778,15 +786,12 @@ void Froxelizer::froxelizeAssignRecordsCompress() noexcept {
778
786
779
787
// iterate the bitfield
780
788
auto * const beginPoint = froxelRecords + offset;
781
- b.lights .forEachSetBit ([point = beginPoint, beginPoint](size_t l) mutable {
789
+ b.lights .forEachSetBit ([p = beginPoint](size_t l) mutable {
782
790
// make sure to keep this code branch-less
783
791
const size_t word = l / LIGHT_PER_GROUP;
784
792
const size_t bit = l % LIGHT_PER_GROUP;
785
793
l = (bit * GROUP_COUNT) | (word % GROUP_COUNT);
786
- *point = RecordBufferType (l);
787
- // we need to "cancel" the write operation if we have more than 255 spot or point lights
788
- // (this is a limitation of the data type used to store the light counts per froxel)
789
- point += (point - beginPoint < 255 ) ? 1 : 0 ;
794
+ *p++ = RecordBufferType (l);
790
795
});
791
796
792
797
offset += lightCount;
0 commit comments