fox-it · Horofic · Aug 16, 2024 · Jan 23, 2024 · Feb 14, 2024 · Feb 15, 2024
diff --git a/dissect/target/plugins/apps/texteditor/__init__.py b/dissect/target/plugins/apps/texteditor/__init__.py
diff --git a/dissect/target/plugins/apps/texteditor/texteditor.py b/dissect/target/plugins/apps/texteditor/texteditor.py
@@ -0,0 +1,16 @@
+from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
+from dissect.target.helpers.record import create_extended_descriptor
+from dissect.target.plugin import NamespacePlugin
+
+GENERIC_TAB_CONTENTS_RECORD_FIELDS = [
+    ("string", "content"),
+    ("path", "path"),
+]
+
+TexteditorTabContentRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
+    "texteditor/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+)
+
+
+class TexteditorPlugin(NamespacePlugin):
+    __namespace__ = "texteditor"
diff --git a/dissect/target/plugins/apps/texteditor/windowsnotepad.py b/dissect/target/plugins/apps/texteditor/windowsnotepad.py
@@ -0,0 +1,213 @@
+from __future__ import annotations
+
+import zlib
+from typing import Iterator
+
+from dissect.cstruct import cstruct
+
+from dissect.target.exceptions import UnsupportedPluginError
+from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
+from dissect.target.helpers.fsutil import TargetPath
+from dissect.target.helpers.record import (
+    UnixUserRecord,
+    WindowsUserRecord,
+    create_extended_descriptor,
+)
+from dissect.target.plugin import export
+from dissect.target.plugins.apps.texteditor.texteditor import (
+    GENERIC_TAB_CONTENTS_RECORD_FIELDS,
+    TexteditorPlugin,
+)
+
+# Thanks to @Nordgaren, @daddycocoaman, @JustArion and @ogmini for their suggestions and feedback in the PR
+# thread. This really helped to figure out the last missing bits and pieces
+# required for recovering text from these files.
+
+c_def = """
+struct header {
+    char        magic[2]; // NP
+    uint8       unk0;
+    uint8       fileState; // 0 if unsaved, 1 if saved
+}
+
+struct header_saved_tab {
+    uleb128     filePathLength;
+    wchar       filePath[filePathLength];
+    uleb128     fileSize;
+    uleb128     encoding;
+    uleb128     carriageReturnType;
+    uleb128     timestamp; // Windows Filetime format (not unix timestamp)
+    char        sha256[32];
+    char        unk[6];
+};
+
+struct header_unsaved_tab {
+    uint8       unk0;
+    uleb128     fileSize;
+    uleb128     fileSizeDuplicate;
+    uint8       unk1;
+    uint8       unk2;
+};
+
+struct data_block {
+    uleb128     offset;
+    uleb128     nDeleted;
+    uleb128     nAdded;
+    wchar       data[nAdded];
+};
+"""
+
+c_windowstab = cstruct()
+c_windowstab.load(c_def)
+
+TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
+    "texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
+)
+
+
+def _calc_crc32(data: bytes) -> bytes:
+    """Perform a CRC32 checksum on the data and return it as bytes."""
+    return zlib.crc32(data).to_bytes(length=4, byteorder="big")
+
+
+class WindowsNotepadPlugin(TexteditorPlugin):
+    """Windows notepad tab content plugin."""
+
+    __namespace__ = "windowsnotepad"
+
+    GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin"
+
+    def __init__(self, target):
+        super().__init__(target)
+        self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = []
+
+        for user_details in self.target.user_details.all_with_home():
+            for tab_file in user_details.home_path.glob(self.GLOB):
+                if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"):
+                    continue
+
+                self.users_tabs.append((tab_file, user_details.user))
+
+    def check_compatible(self) -> None:
+        if not self.users_tabs:
+            raise UnsupportedPluginError("No Windows Notepad temporary tab files found")
+
+    def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUserRecord) -> TextEditorTabRecord:
+        """Parse a binary tab file and reconstruct the contents.
+
+        Args:
+            file: The binary file on disk that needs to be parsed.
+
+        Returns:
+            A TextEditorTabRecord containing information that is in the tab.
+        """
+        with file.open("rb") as fh:
+            # Header is the same for all types
+            header = c_windowstab.header(fh)
+
+            # File can be saved, or unsaved. Depending on the file state, different header fields are present
+            # Currently, no information in the header is used in the outputted records, only the contents of the tab
+            tab = (
+                c_windowstab.header_saved_tab(fh)
+                if header.fileState == 0x01  # 0x00 is unsaved, 0x01 is saved
+                else c_windowstab.header_unsaved_tab(fh)
+            )
+
+            # In the case that the file size is known up front, then this fileSize is set to a nonzero value
+            # This means that the data is stored in one block
+            if tab.fileSize != 0:
+                # So we only parse one block
+                data_entry = c_windowstab.data_block(fh)
+
+                # An extra byte is appended to the single block, not yet sure where this is defined and/or used for
+                extra_byte = fh.read(1)
+
+                # The CRC32 value is appended after the extra byte in big-endian
+                defined_crc32 = fh.read(4)
+
+                # The header (minus the magic) plus all data (including the extra byte)  is included in the checksum
+                actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps() + extra_byte)
+
+                if defined_crc32 != actual_crc32:
+                    self.target.log.warning(
+                        "CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)",
+                        file.name,
+                        defined_crc32.hex(),
+                        actual_crc32.hex(),
+                    )
+
+                text = data_entry.data
+
+            else:
+                # Here, the fileSize is zeroed, meaning that the size is not known up front.
+                # Data may be stored in multiple, variable-length blocks. This happens, for example, when several
+                # additions and deletions of characters have been recorded and these changes have not been 'flushed'
+
+                # First, parse 4 unknown bytes. These likely
+                # hold some addition information about the tab (view options etc.)
+                unknown_bytes = fh.read(4)
+
+                # In this multi-block variant, the header itself has a CRC32 value in big-endian as well
+                defined_header_crc32 = fh.read(4)
+
+                # Calculate CRC32 of the header and check if it matches
+                actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + unknown_bytes)
+                if defined_header_crc32 != actual_header_crc32:
+                    self.target.log.warning(
+                        "CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s",
+                        file.name,
+                        defined_header_crc32.hex(),
+                        actual_header_crc32.hex(),
+                    )
+
+                # Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
+                # a list is used to easily insert text at offsets
+                text = []
+
+                while True:
+                    # Unfortunately, there is no way of determining how many blocks there are. So just try to parse
+                    # until we reach EOF, after which we stop.
+                    try:
+                        data_entry = c_windowstab.data_block(fh)
+                    except EOFError:
+                        break
+
+                    # Each block has a CRC32 value in big-endian appended to the block
+                    defined_crc32 = fh.read(4)
+
+                    # Either the nAdded is nonzero, or the nDeleted
+                    if data_entry.nAdded > 0:
+                        # Check the CRC32 checksum for this block
+                        actual_crc32 = _calc_crc32(data_entry.dumps())
+                        if defined_crc32 != actual_crc32:
+                            self.target.log.warning(
+                                "CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s",
+                                file.name,
+                                data_entry.crc32.hex(),
+                                actual_crc32.hex(),
+                            )
+
+                        # Insert the text at the correct offset.
+                        for idx in range(data_entry.nAdded):
+                            text.insert(data_entry.offset + idx, data_entry.data[idx])
+
+                    elif data_entry.nDeleted > 0:
+                        # Create a new slice. Include everything up to the offset,
+                        # plus everything after the nDeleted following bytes
+                        text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :]
+
+                # Join all the characters to reconstruct the original text
+                text = "".join(text)
+
+        return TextEditorTabRecord(content=text, path=file, _target=self.target, _user=user)
+
+    @export(record=TextEditorTabRecord)
+    def tabs(self) -> Iterator[TextEditorTabRecord]:
+        """Return contents from Windows 11 temporary Notepad tabs.
+
+        Yields TextEditorTabRecord with the following fields:
+            contents (string): The contents of the tab.
+            path (path): The path the content originates from.
+        """
+        for file, user in self.users_tabs:
+            yield self._process_tab_file(file, user)
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 ]
 dependencies = [
     "defusedxml",
-    "dissect.cstruct>=3.0.dev,<4.0.dev",
+    "dissect.cstruct>=3.14.dev4,<4.0.dev",
     "dissect.eventlog>=3.0.dev,<4.0.dev",
     "dissect.evidence>=3.0.dev,<4.0.dev",
     "dissect.hypervisor>=3.0.dev,<4.0.dev",

diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/3f915e17-cf6c-462b-9bd1-2f23314cb979.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/3f915e17-cf6c-462b-9bd1-2f23314cb979.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_saved_and_deletions.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_saved_and_deletions.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_unsaved.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/appclosed_unsaved.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/cfe38135-9dca-4480-944f-d5ea0e1e589f.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/cfe38135-9dca-4480-944f-d5ea0e1e589f.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/dae80df8-e1e5-4996-87fe-b453f63fcb19.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/dae80df8-e1e5-4996-87fe-b453f63fcb19.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/e609218e-94f2-45fa-84e2-f29df2190b26.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/e609218e-94f2-45fa-84e2-f29df2190b26.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/lots-of-deletions.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/lots-of-deletions.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/saved.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/saved.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved-with-deletions.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/unsaved.bin
diff --git a/tests/_data/plugins/apps/texteditor/windowsnotepad/wrong-checksum.bin b/tests/_data/plugins/apps/texteditor/windowsnotepad/wrong-checksum.bin
diff --git a/tests/plugins/apps/texteditor/__init__.py b/tests/plugins/apps/texteditor/__init__.py
diff --git a/tests/plugins/apps/texteditor/test_texteditor.py b/tests/plugins/apps/texteditor/test_texteditor.py
@@ -0,0 +1,73 @@
+import os
+
+from dissect.target.plugins.apps.texteditor import windowsnotepad
+from tests._utils import absolute_path
+
+text1 = "This is an unsaved tab, UTF-8 encoded with Windows (CRLF). It's only 88 characters long."
+text2 = (
+    "Dissect は、インシデント対応のための優れたフレームワークです。 The Notepad window shows UTF-8 as the encoding. This text has 113 "
+    "characters."
+)
+text3 = "This is a very short text."
+text4 = "This is another short test. And we should be able to parse this."
+text5 = "This is a test and the text is longer than 256 bytes. "
+text6 = "This is a test and the text is longer than 65536 bytes. "
+text7 = (
+    "This a text, which is nothing special. But I am going to modify it a bit. For example, "
+    "I have removed quote some stuff. Adding a word in the beginning now... "
+    "At this point, I've edited it quite a lot."
+)
+text8 = (
+    "Closing application now. It's saved but now I'm adding unsaved changes and closing "
+    "the application again. Dit a few deletions!"
+)
+loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r"""  # noqa: E501
+
+
+def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplog):
+    file_text_map = {
+        "c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": text1,
+        "85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": text2,
+        "dae80df8-e1e5-4996-87fe-b453f63fcb19.bin": text3,
+        "3f915e17-cf6c-462b-9bd1-2f23314cb979.bin": text4,
+        "ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin": (text5 * 5),
+        "e609218e-94f2-45fa-84e2-f29df2190b26.bin": (text6 * 1260),
+        "3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum,
+        "wrong-checksum.bin": text4,  # only added to check for corrupt checksum, not validity
+        "cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2],  # removed the two newlines in this file
+        "saved.bin": "Saved!",
+        "unsaved.bin": "Not saved at all",
+        "unsaved-with-deletions.bin": "Not saved aasdflasd",
+        "lots-of-deletions.bin": text7,
+        "appclosed_saved_and_deletions.bin": text8,
+        "appclosed_unsaved.bin": "Closing application now",
+    }
+
+    tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")
+
+    user = target_win_users.user_details.find(username="John")
+    tab_dir = user.home_path.joinpath(
+        "AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState"
+    )
+
+    fs_win.map_dir("Users\\John", tmp_path)
+
+    for file in file_text_map.keys():
+        tab_file = str(tab_dir.joinpath(file))[3:]
+        fs_win.map_file(tab_file, os.path.join(tabcache, file))
+
+    target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin)
+
+    records = list(target_win.windowsnotepad.tabs())
+
+    # Check the amount of files
+    assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
+    assert len(records) == len(file_text_map.keys())
+
+    # One file should still return contents, but there should be an entry for in the logging for a CRC missmatch.
+    assert "CRC32 mismatch in single-block file: wrong-checksum.bin (expected=deadbeef, actual=a48d30a6)" in caplog.text
+
+    # The recovered content in the records should match the original data, as well as the length
+    for rec in records:
+        assert rec.content == file_text_map[rec.path.name]
+        assert len(rec.content) == len(file_text_map[rec.path.name])