-
Notifications
You must be signed in to change notification settings - Fork 9k
Improve sixel performance #19639
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Improve sixel performance #19639
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1017,6 +1017,7 @@ minkernel | |
| MINMAXINFO | ||
| minwin | ||
| minwindef | ||
| misprediction | ||
| MMBB | ||
| mmcc | ||
| MMCPL | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -729,39 +729,100 @@ void SixelParser::_decreaseFilledBackgroundHeight(const int decreasedHeight) noe | |
| } | ||
| } | ||
|
|
||
| #pragma warning(push) | ||
| #pragma warning(disable : 26429) // Symbol 'imageBufferPtr' is never tested for nullness, it can be marked as not_null (f.23). | ||
| #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). | ||
| #pragma warning(disable : 26490) // Don't use reinterpret_cast (type.1). | ||
|
|
||
| void SixelParser::_writeToImageBuffer(int sixelValue, int repeatCount) | ||
| { | ||
| // On terminals that support the raster attributes command (which sets the | ||
| // background size), the background is only drawn when the first sixel value | ||
| // is received. So if we haven't filled it yet, we need to do so now. | ||
| _fillImageBackground(); | ||
|
|
||
| repeatCount = std::min(repeatCount, _imageMaxWidth - _imageCursor.x); | ||
| if (repeatCount <= 0) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| if (sixelValue == 0) | ||
| { | ||
| _imageCursor.x += repeatCount; | ||
| return; | ||
| } | ||
|
|
||
| // This allows us to unsafely cast _imageBuffer to uint16_t | ||
| // and benefit from compiler/STL optimizations. | ||
| static_assert(sizeof(IndexedPixel) == sizeof(int16_t)); | ||
| static_assert(alignof(IndexedPixel) == alignof(int16_t)); | ||
|
|
||
| // Then we need to render the 6 vertical pixels that are represented by the | ||
| // bits in the sixel value. Although note that each of these sixel pixels | ||
| // may cover more than one device pixel, depending on the aspect ratio. | ||
| const auto targetOffset = _imageCursor.y * _imageMaxWidth + _imageCursor.x; | ||
| auto imageBufferPtr = std::next(_imageBuffer.data(), targetOffset); | ||
| repeatCount = std::min(repeatCount, _imageMaxWidth - _imageCursor.x); | ||
| for (auto i = 0; i < 6; i++) | ||
| const auto foreground = std::bit_cast<int16_t>(_foregroundPixel); | ||
| auto imageBufferPtr = reinterpret_cast<int16_t*>(_imageBuffer.data() + targetOffset); | ||
|
|
||
| // A aspect ratio of 1:1 is the most common and worth optimizing. | ||
| if (_pixelAspectRatio == 1) | ||
| { | ||
| if (sixelValue & 1) | ||
| do | ||
| { | ||
| auto repeatAspectRatio = _pixelAspectRatio; | ||
| do | ||
| // This gets unrolled by MSVC. It's written this way to use CMOV instructions. | ||
| // Modern CPUs have fat caches and deep pipelines. It's better to do pointless reads | ||
| // from memory than causing branch misprediction, as sixelValue is highly random. | ||
| for (int i = 0; i < 6; i++) | ||
| { | ||
| std::fill_n(imageBufferPtr, repeatCount, _foregroundPixel); | ||
| std::advance(imageBufferPtr, _imageMaxWidth); | ||
| } while (--repeatAspectRatio > 0); | ||
| } | ||
| else | ||
| const auto test = sixelValue & (1 << i); | ||
| // Possibly pointless read from memory, but... | ||
| const auto before = imageBufferPtr[i * _imageMaxWidth]; | ||
| // ...it allows the compiler to turn this into a CMOV (= no branch misprediction). | ||
| const auto after = test ? foreground : before; | ||
| imageBufferPtr[i * _imageMaxWidth] = after; | ||
| } | ||
|
Comment on lines
+773
to
+784
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added some comments, but the idea here is that |
||
|
|
||
| _imageCursor.x += 1; | ||
| imageBufferPtr += 1; | ||
| repeatCount -= 1; | ||
| } while (repeatCount > 0); | ||
| } | ||
| else | ||
| { | ||
| for (auto i = 0; i < 6; i++) | ||
| { | ||
| std::advance(imageBufferPtr, _imageMaxWidth * _pixelAspectRatio); | ||
| if (sixelValue & 1) | ||
| { | ||
| auto repeatAspectRatio = _pixelAspectRatio; | ||
| do | ||
| { | ||
| // If this used std::fill_n or just a primitive loop, MSVC would compile | ||
| // it to a `rep stosw` instruction, which has a high startup cost. | ||
| // This is not ideal when our repeatCount is almost always small. | ||
| // The way this does ptr++ and len-- is also ideal for optimization. | ||
| auto ptr = imageBufferPtr; | ||
| auto remaining = repeatCount; | ||
| do | ||
| { | ||
| __iso_volatile_store16(ptr++, foreground); | ||
| } while (--remaining != 0); | ||
|
Comment on lines
+800
to
+809
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To work around |
||
|
|
||
| imageBufferPtr += _imageMaxWidth; | ||
| } while (--repeatAspectRatio > 0); | ||
| } | ||
| else | ||
| { | ||
| std::advance(imageBufferPtr, _imageMaxWidth * _pixelAspectRatio); | ||
| } | ||
| sixelValue >>= 1; | ||
| } | ||
| sixelValue >>= 1; | ||
| _imageCursor.x += repeatCount; | ||
| } | ||
| _imageCursor.x += repeatCount; | ||
| } | ||
|
|
||
| #pragma warning(pop) | ||
|
|
||
| void SixelParser::_eraseImageBufferRows(const int rowCount, const til::CoordType rowOffset) noexcept | ||
| { | ||
| const auto pixelCount = rowCount * _cellSize.height; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Initially, I did this
int16_tcast, becausestd::fillcontains optimizations for primitive types (those that can be memset'd). To my dismay, I found out today that they added this check for "are all bits zero? if so, use memset": microsoft/STL@a27d894