Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
c528241
Initial commit
joost-j Jan 23, 2024
3594ff0
Removed unused 'seek_size' function
joost-j Feb 14, 2024
b1bcd69
Refactored the code to work with new LEB128 structure, added some mor…
joost-j Feb 15, 2024
c634987
Added more comments
joost-j Feb 15, 2024
d3d35a1
Refactor c_def to include parsing of both variants
joost-j Feb 19, 2024
cef81d0
Bump dissect.cstruct version to >=4.0.dev for clarity
joost-j Feb 19, 2024
7934f3e
Apply suggestions from code review
joost-j Feb 26, 2024
e6ea019
Removed duplicate brackets and refactor assertion into warning log
joost-j Feb 26, 2024
12fdd4a
Change variable names to fsize1 and fsize2, plus some linting
joost-j Feb 26, 2024
39a34a7
Refactored to work with LEB128 backport
joost-j Mar 4, 2024
8566028
Process feedback
joost-j Mar 4, 2024
56a26fa
Set cstruct dependency to next release
joost-j Mar 4, 2024
b18e975
Restore original shimcache.py file
joost-j Mar 4, 2024
1a1d80d
Move TextEditorTabRecord definition
joost-j Mar 25, 2024
b00bdc3
Remove content_length field from record
joost-j Mar 25, 2024
a124202
Apply suggestions from code review
joost-j Mar 25, 2024
dbaca5d
Change TabEditorTabRecord formatting
joost-j Mar 25, 2024
d66fa54
Black formatting, fix tests, add annotations import
joost-j Mar 25, 2024
bdaccbc
Bump cstruct version again
joost-j Mar 25, 2024
ad78273
Bump dependencies as leb128 is now included in dev release
joost-j Mar 28, 2024
0d9c88f
Implemented deletion of characters, refactored, added new tests
joost-j Mar 28, 2024
304db58
Small comment changes
joost-j Mar 28, 2024
2ca889c
Remove chunked addition of zero bytes
joost-j Mar 28, 2024
74ffb83
Added new test, changed to list insertion instead of appending
joost-j Mar 28, 2024
c148061
Refactored test file and removed fileState enum
joost-j Mar 28, 2024
2bf6e2f
Small comment changes/typos
joost-j Apr 11, 2024
a19c49b
Split plugin from parsing logic, added more tests
joost-j Apr 26, 2024
f808bc7
Removed fh.read() and re-added them to the c_def
joost-j Apr 26, 2024
9b38f3e
Added options and more test cases to support newest version
joost-j Apr 26, 2024
a3b6f27
Added separate records for unsaved/saved tabs, included more data (ti…
joost-j May 8, 2024
677817c
Change cstruct version
joost-j May 13, 2024
9674e37
Remove the --include-deleted-contents arg and make it default
joost-j Aug 14, 2024
06e3f07
Rewrite TabContent records into WindowsNotepadTab class
joost-j Aug 14, 2024
a384fd9
Implement repr for WindowsNotepadTab class
joost-j Aug 14, 2024
914c324
Merge branch 'main' into feature/windows_notepad_tabs
joost-j Aug 14, 2024
e625684
Add typehints and small fixes
Horofic Aug 16, 2024
9bb13c7
Merge branch 'main' into feature/windows_notepad_tabs
Horofic Aug 16, 2024
27fca92
Add suggestions
Horofic Aug 16, 2024
a9b32eb
Merge branch 'feature/windows_notepad_tabs' of github.com:joost-j/dis…
Horofic Aug 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
16 changes: 16 additions & 0 deletions dissect/target/plugins/apps/texteditor/texteditor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
from dissect.target.helpers.record import create_extended_descriptor
from dissect.target.plugin import NamespacePlugin

GENERIC_TAB_CONTENTS_RECORD_FIELDS = [
("string", "content"),
("path", "path"),
]

TexteditorTabContentRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
"texteditor/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
)


class TexteditorPlugin(NamespacePlugin):
__namespace__ = "texteditor"
213 changes: 213 additions & 0 deletions dissect/target/plugins/apps/texteditor/windowsnotepad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
from __future__ import annotations

import zlib
from typing import Iterator

from dissect.cstruct import cstruct

from dissect.target.exceptions import UnsupportedPluginError
from dissect.target.helpers.descriptor_extensions import UserRecordDescriptorExtension
from dissect.target.helpers.fsutil import TargetPath
from dissect.target.helpers.record import (
UnixUserRecord,
WindowsUserRecord,
create_extended_descriptor,
)
from dissect.target.plugin import export
from dissect.target.plugins.apps.texteditor.texteditor import (
GENERIC_TAB_CONTENTS_RECORD_FIELDS,
TexteditorPlugin,
)

# Thanks to @Nordgaren, @daddycocoaman, @JustArion and @ogmini for their suggestions and feedback in the PR
# thread. This really helped to figure out the last missing bits and pieces
# required for recovering text from these files.

c_def = """
struct header {
char magic[2]; // NP
uint8 unk0;
uint8 fileState; // 0 if unsaved, 1 if saved
}

struct header_saved_tab {
uleb128 filePathLength;
wchar filePath[filePathLength];
uleb128 fileSize;
uleb128 encoding;
uleb128 carriageReturnType;
uleb128 timestamp; // Windows Filetime format (not unix timestamp)
char sha256[32];
char unk[6];
};

struct header_unsaved_tab {
uint8 unk0;
uleb128 fileSize;
uleb128 fileSizeDuplicate;
uint8 unk1;
uint8 unk2;
};

struct data_block {
uleb128 offset;
uleb128 nDeleted;
uleb128 nAdded;
wchar data[nAdded];
};
"""

c_windowstab = cstruct()
c_windowstab.load(c_def)

TextEditorTabRecord = create_extended_descriptor([UserRecordDescriptorExtension])(
"texteditor/windowsnotepad/tab", GENERIC_TAB_CONTENTS_RECORD_FIELDS
)


def _calc_crc32(data: bytes) -> bytes:
"""Perform a CRC32 checksum on the data and return it as bytes."""
return zlib.crc32(data).to_bytes(length=4, byteorder="big")


class WindowsNotepadPlugin(TexteditorPlugin):
"""Windows notepad tab content plugin."""

__namespace__ = "windowsnotepad"

GLOB = "AppData/Local/Packages/Microsoft.WindowsNotepad_*/LocalState/TabState/*.bin"

def __init__(self, target):
super().__init__(target)
self.users_tabs: list[TargetPath, UnixUserRecord | WindowsUserRecord] = []

for user_details in self.target.user_details.all_with_home():
for tab_file in user_details.home_path.glob(self.GLOB):
if tab_file.name.endswith(".1.bin") or tab_file.name.endswith(".0.bin"):
continue

self.users_tabs.append((tab_file, user_details.user))

def check_compatible(self) -> None:
if not self.users_tabs:
raise UnsupportedPluginError("No Windows Notepad temporary tab files found")

def _process_tab_file(self, file: TargetPath, user: UnixUserRecord | WindowsUserRecord) -> TextEditorTabRecord:
"""Parse a binary tab file and reconstruct the contents.

Args:
file: The binary file on disk that needs to be parsed.

Returns:
A TextEditorTabRecord containing information that is in the tab.
"""
with file.open("rb") as fh:
# Header is the same for all types
header = c_windowstab.header(fh)

# File can be saved, or unsaved. Depending on the file state, different header fields are present
# Currently, no information in the header is used in the outputted records, only the contents of the tab
tab = (
c_windowstab.header_saved_tab(fh)
if header.fileState == 0x01 # 0x00 is unsaved, 0x01 is saved
else c_windowstab.header_unsaved_tab(fh)
)

# In the case that the file size is known up front, then this fileSize is set to a nonzero value
# This means that the data is stored in one block
if tab.fileSize != 0:
# So we only parse one block
data_entry = c_windowstab.data_block(fh)

# An extra byte is appended to the single block, not yet sure where this is defined and/or used for
extra_byte = fh.read(1)

# The CRC32 value is appended after the extra byte in big-endian
defined_crc32 = fh.read(4)

# The header (minus the magic) plus all data (including the extra byte) is included in the checksum
actual_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + data_entry.dumps() + extra_byte)

if defined_crc32 != actual_crc32:
self.target.log.warning(
"CRC32 mismatch in single-block file: %s (expected=%s, actual=%s)",
file.name,
defined_crc32.hex(),
actual_crc32.hex(),
)

text = data_entry.data

else:
# Here, the fileSize is zeroed, meaning that the size is not known up front.
# Data may be stored in multiple, variable-length blocks. This happens, for example, when several
# additions and deletions of characters have been recorded and these changes have not been 'flushed'

# First, parse 4 unknown bytes. These likely
# hold some addition information about the tab (view options etc.)
unknown_bytes = fh.read(4)

# In this multi-block variant, the header itself has a CRC32 value in big-endian as well
defined_header_crc32 = fh.read(4)

# Calculate CRC32 of the header and check if it matches
actual_header_crc32 = _calc_crc32(header.dumps()[3:] + tab.dumps() + unknown_bytes)
if defined_header_crc32 != actual_header_crc32:
self.target.log.warning(
"CRC32 mismatch in header of multi-block file: %s " "expected=%s, actual=%s",
file.name,
defined_header_crc32.hex(),
actual_header_crc32.hex(),
)

# Since we don't know the size of the file up front, and offsets don't necessarily have to be in order,
# a list is used to easily insert text at offsets
text = []

while True:
# Unfortunately, there is no way of determining how many blocks there are. So just try to parse
# until we reach EOF, after which we stop.
try:
data_entry = c_windowstab.data_block(fh)
except EOFError:
break

# Each block has a CRC32 value in big-endian appended to the block
defined_crc32 = fh.read(4)

# Either the nAdded is nonzero, or the nDeleted
if data_entry.nAdded > 0:
# Check the CRC32 checksum for this block
actual_crc32 = _calc_crc32(data_entry.dumps())
if defined_crc32 != actual_crc32:
self.target.log.warning(
"CRC32 mismatch in multi-block file: %s " "expected=%s, actual=%s",
file.name,
data_entry.crc32.hex(),
actual_crc32.hex(),
)

# Insert the text at the correct offset.
for idx in range(data_entry.nAdded):
text.insert(data_entry.offset + idx, data_entry.data[idx])

elif data_entry.nDeleted > 0:
# Create a new slice. Include everything up to the offset,
# plus everything after the nDeleted following bytes
text = text[: data_entry.offset] + text[data_entry.offset + data_entry.nDeleted :]

# Join all the characters to reconstruct the original text
text = "".join(text)

return TextEditorTabRecord(content=text, path=file, _target=self.target, _user=user)

@export(record=TextEditorTabRecord)
def tabs(self) -> Iterator[TextEditorTabRecord]:
"""Return contents from Windows 11 temporary Notepad tabs.

Yields TextEditorTabRecord with the following fields:
contents (string): The contents of the tab.
path (path): The path the content originates from.
"""
for file, user in self.users_tabs:
yield self._process_tab_file(file, user)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ classifiers = [
]
dependencies = [
"defusedxml",
"dissect.cstruct>=3.0.dev,<4.0.dev",
"dissect.cstruct>=3.14.dev4,<4.0.dev",
"dissect.eventlog>=3.0.dev,<4.0.dev",
"dissect.evidence>=3.0.dev,<4.0.dev",
"dissect.hypervisor>=3.0.dev,<4.0.dev",
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file.
73 changes: 73 additions & 0 deletions tests/plugins/apps/texteditor/test_texteditor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os

from dissect.target.plugins.apps.texteditor import windowsnotepad
from tests._utils import absolute_path

text1 = "This is an unsaved tab, UTF-8 encoded with Windows (CRLF). It's only 88 characters long."
text2 = (
"Dissect は、インシデント対応のための優れたフレームワークです。 The Notepad window shows UTF-8 as the encoding. This text has 113 "
"characters."
)
text3 = "This is a very short text."
text4 = "This is another short test. And we should be able to parse this."
text5 = "This is a test and the text is longer than 256 bytes. "
text6 = "This is a test and the text is longer than 65536 bytes. "
text7 = (
"This a text, which is nothing special. But I am going to modify it a bit. For example, "
"I have removed quote some stuff. Adding a word in the beginning now... "
"At this point, I've edited it quite a lot."
)
text8 = (
"Closing application now. It's saved but now I'm adding unsaved changes and closing "
"the application again. Dit a few deletions!"
)
loremipsum = """Lorem ipsum dolor sit amet. Eum error blanditiis eum pariatur delectus ut consequuntur officiis a excepturi dignissimos et doloribus quia 33 perspiciatis soluta nam perspiciatis dolor. Ut repudiandae quidem cum sint modi qui sint consequatur. Aut autem quidem eum enim consequatur qui voluptate consequatur non similique voluptate. A vitae modi vel sint provident ut galisum tenetur sit voluptatem amet. Est impedit perspiciatis est repudiandae voluptates ut fugit alias! Eum magni esse aut velit illum qui excepturi aperiam. Ex dolores asperiores ut debitis omnis qui consequuntur dolore. Est voluptatem mollitia et quibusdam unde ea accusamus fuga. Cum quis galisum et impedit sunt qui aliquam perspiciatis sed modi quidem qui nisi molestias. Aut temporibus architecto ut neque voluptatem et consequatur deleniti sed accusantium quibusdam et omnis dignissimos ad rerum ipsam et rerum quia. Ut nihil repellat et eaque molestias quo iusto ipsum At optio sint eos quidem earum?\r\rEx deleniti unde eum tenetur rerum ea dolore numquam? Eos aperiam officiis et neque explicabo et enim atque ut eaque omnis non illum eveniet est molestias itaque et ratione voluptatem. Ea deserunt nemo et quos tempora et nostrum aperiam sit necessitatibus illo sit culpa placeat. Vel tempore quibusdam ut velit voluptate aut odio facere non voluptas earum est odio galisum et voluptas harum. Et blanditiis sapiente et nostrum laborum aut voluptatem explicabo a quasi assumenda. Est voluptatem quia eum minima galisum quo totam excepturi aut facilis enim vel voluptate repudiandae sit distinctio laboriosam. Quo possimus molestiae et molestiae accusantium est voluptas omnis sed obcaecati natus. Non vitae asperiores qui nostrum enim id saepe fugiat et incidunt quasi.\r\rEos ipsa facilis aut excepturi voluptatem a omnis magni vel magni iste. Sed ipsum consequatur qui reprehenderit deleniti et soluta molestiae. Ut vero assumenda id dolor ipsum in deleniti voluptatem aut quis quisquam sed repudiandae temporibus ab quia inventore. Sed velit fugit vel facere cumque et delectus ullam sed eaque impedit. Est veritatis dignissimos aut doloribus dolorem vel pariatur repellendus sit nesciunt similique eum architecto quia. Ea expedita veritatis eum dolorem molestiae ut enim fugit aut beatae quibusdam. Aut voluptas natus in quidem deleniti aut animi iure est incidunt tenetur qui culpa maiores! Et nostrum quaerat qui consequatur consequatur aut aliquam atque aut praesentium rerum et consequuntur exercitationem. Non accusantium ipsa vel consectetur vitae ut magnam autem et natus rerum ut consectetur inventore est doloremque temporibus 33 dolores doloribus! Aut perferendis optio et nostrum repellendus et fugit itaque ut nisi neque sed sint quaerat. Aut placeat architecto et eius sapiente eum molestiae quam. Quo mollitia sapiente non Quis neque non tempora laudantium. Quo distinctio quos et molestias natus sit veritatis consequuntur aut repellendus neque a porro galisum cum numquam nesciunt et animi earum? Aut dolorum dolore non assumenda omnis et molestiae amet id sint vero est eligendi harum sit temporibus magnam aut ipsam quos.\r\r""" # noqa: E501


def test_texteditor_plugin(target_win, fs_win, tmp_path, target_win_users, caplog):
file_text_map = {
"c515e86f-08b3-4d76-844a-cddfcd43fcbb.bin": text1,
"85167c9d-aac2-4469-ae44-db5dccf8f7f4.bin": text2,
"dae80df8-e1e5-4996-87fe-b453f63fcb19.bin": text3,
"3f915e17-cf6c-462b-9bd1-2f23314cb979.bin": text4,
"ba291ccd-f1c3-4ca8-949c-c01f6633789d.bin": (text5 * 5),
"e609218e-94f2-45fa-84e2-f29df2190b26.bin": (text6 * 1260),
"3d0cc86e-dfc9-4f16-b74a-918c2c24188c.bin": loremipsum,
"wrong-checksum.bin": text4, # only added to check for corrupt checksum, not validity
"cfe38135-9dca-4480-944f-d5ea0e1e589f.bin": (loremipsum * 37)[:-2], # removed the two newlines in this file
"saved.bin": "Saved!",
"unsaved.bin": "Not saved at all",
"unsaved-with-deletions.bin": "Not saved aasdflasd",
"lots-of-deletions.bin": text7,
"appclosed_saved_and_deletions.bin": text8,
"appclosed_unsaved.bin": "Closing application now",
}

tabcache = absolute_path("_data/plugins/apps/texteditor/windowsnotepad/")

user = target_win_users.user_details.find(username="John")
tab_dir = user.home_path.joinpath(
"AppData/Local/Packages/Microsoft.WindowsNotepad_8wekyb3d8bbwe/LocalState/TabState"
)

fs_win.map_dir("Users\\John", tmp_path)

for file in file_text_map.keys():
tab_file = str(tab_dir.joinpath(file))[3:]
fs_win.map_file(tab_file, os.path.join(tabcache, file))

target_win.add_plugin(windowsnotepad.WindowsNotepadPlugin)

records = list(target_win.windowsnotepad.tabs())

# Check the amount of files
assert len(list(tab_dir.iterdir())) == len(file_text_map.keys())
assert len(records) == len(file_text_map.keys())

# One file should still return contents, but there should be an entry for in the logging for a CRC missmatch.
assert "CRC32 mismatch in single-block file: wrong-checksum.bin (expected=deadbeef, actual=a48d30a6)" in caplog.text

# The recovered content in the records should match the original data, as well as the length
for rec in records:
assert rec.content == file_text_map[rec.path.name]
assert len(rec.content) == len(file_text_map[rec.path.name])