Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18527.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add ability to limit amount uploaded by a user in a given time period.
17 changes: 17 additions & 0 deletions docs/usage/configuration/config_documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -2058,6 +2058,23 @@ Example configuration:
max_upload_size: 60M
```
---
### `media_upload_limits`

*(array)* A list of media upload limits defining how much data a given user can upload in a given time period.

An empty list means no limits are applied.

Defaults to `[]`.

Example configuration:
```yaml
media_upload_limits:
- time_period: 1h
max_size: 100M
- time_period: 1w
max_size: 500M
```
---
### `max_image_pixels`

*(byte size)* Maximum number of pixels that will be thumbnailed. Defaults to `"32M"`.
Expand Down
24 changes: 24 additions & 0 deletions schema/synapse-config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2300,6 +2300,30 @@ properties:
default: 50M
examples:
- 60M
media_upload_limits:
type: array
description: >-
A list of media upload limits defining how much data a given user can
upload in a given time period.


An empty list means no limits are applied.
default: []
items:
time_period:
type: "#/$defs/duration"
description: >-
The time period over which the limit applies. Required.
max_size:
type: "#/$defs/bytes"
description: >-
Amount of data that can be uploaded in the time period by the user.
Required.
examples:
- - time_period: 1h
max_size: 100M
- time_period: 1w
max_size: 500M
max_image_pixels:
$ref: "#/$defs/bytes"
description: Maximum number of pixels that will be thumbnailed.
Expand Down
16 changes: 16 additions & 0 deletions synapse/config/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ def parse_thumbnail_requirements(
}


@attr.s(auto_attribs=True, slots=True, frozen=True)
class MediaUploadLimit:
"""A limit on the amount of data a user can upload in a given time
period."""

max_bytes: int
time_period_ms: int


class ContentRepositoryConfig(Config):
section = "media"

Expand Down Expand Up @@ -274,6 +283,13 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:

self.enable_authenticated_media = config.get("enable_authenticated_media", True)

self.media_upload_limits: List[MediaUploadLimit] = []
for limit_config in config.get("media_upload_limits", []):
time_period_ms = self.parse_duration(limit_config["time_period"])
max_bytes = self.parse_size(limit_config["max_size"])

self.media_upload_limits.append(MediaUploadLimit(max_bytes, time_period_ms))

def generate_config_section(self, data_dir_path: str, **kwargs: Any) -> str:
assert data_dir_path is not None
media_store = os.path.join(data_dir_path, "media_store")
Expand Down
2 changes: 1 addition & 1 deletion synapse/handlers/sso.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ def is_allowed_mime_type(content_type: str) -> bool:
return True

# store it in media repository
avatar_mxc_url = await self._media_repo.create_content(
avatar_mxc_url = await self._media_repo.create_or_update_content(
media_type=headers[b"Content-Type"][0].decode("utf-8"),
upload_name=upload_name,
content=picture,
Expand Down
128 changes: 66 additions & 62 deletions synapse/media/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,13 @@ def __init__(self, hs: "HomeServer"):
else:
self.url_previewer = None

# We get the media upload limits and sort them in descending order of
# time period, so that we can apply some optimizations.
self.media_upload_limits = hs.config.media.media_upload_limits
self.media_upload_limits.sort(
key=lambda limit: limit.time_period_ms, reverse=True
)

def _start_update_recently_accessed(self) -> Deferred:
return run_as_background_process(
"update_recently_accessed_media", self._update_recently_accessed
Expand Down Expand Up @@ -285,80 +292,37 @@ async def verify_can_upload(self, media_id: str, auth_user: UserID) -> None:
raise NotFoundError("Media ID has expired")

@trace
async def update_content(
self,
media_id: str,
media_type: str,
upload_name: Optional[str],
content: IO,
content_length: int,
auth_user: UserID,
) -> None:
"""Update the content of the given media ID.

Args:
media_id: The media ID to replace.
media_type: The content type of the file.
upload_name: The name of the file, if provided.
content: A file like object that is the content to store
content_length: The length of the content
auth_user: The user_id of the uploader
"""
file_info = FileInfo(server_name=None, file_id=media_id)
sha256reader = SHA256TransparentIOReader(content)
# This implements all of IO as it has a passthrough
fname = await self.media_storage.store_file(sha256reader.wrap(), file_info)
sha256 = sha256reader.hexdigest()
should_quarantine = await self.store.get_is_hash_quarantined(sha256)
logger.info("Stored local media in file %r", fname)

if should_quarantine:
logger.warn(
"Media has been automatically quarantined as it matched existing quarantined media"
)

await self.store.update_local_media(
media_id=media_id,
media_type=media_type,
upload_name=upload_name,
media_length=content_length,
user_id=auth_user,
sha256=sha256,
quarantined_by="system" if should_quarantine else None,
)

try:
await self._generate_thumbnails(None, media_id, media_id, media_type)
except Exception as e:
logger.info("Failed to generate thumbnails: %s", e)

@trace
async def create_content(
async def create_or_update_content(
self,
media_type: str,
upload_name: Optional[str],
content: IO,
content_length: int,
auth_user: UserID,
media_id: Optional[str] = None,
) -> MXCUri:
"""Store uploaded content for a local user and return the mxc URL
"""Create or update the content of the given media ID.

Args:
media_type: The content type of the file.
upload_name: The name of the file, if provided.
content: A file like object that is the content to store
content_length: The length of the content
auth_user: The user_id of the uploader
media_id: The media ID to update if provided, otherwise creates
new media ID.

Returns:
The mxc url of the stored content
"""

media_id = random_string(24)
is_new_media = media_id is None
if media_id is None:
media_id = random_string(24)

file_info = FileInfo(server_name=None, file_id=media_id)
# This implements all of IO as it has a passthrough
sha256reader = SHA256TransparentIOReader(content)
# This implements all of IO as it has a passthrough
fname = await self.media_storage.store_file(sha256reader.wrap(), file_info)
sha256 = sha256reader.hexdigest()
should_quarantine = await self.store.get_is_hash_quarantined(sha256)
Expand All @@ -370,16 +334,56 @@ async def create_content(
"Media has been automatically quarantined as it matched existing quarantined media"
)

await self.store.store_local_media(
media_id=media_id,
media_type=media_type,
time_now_ms=self.clock.time_msec(),
upload_name=upload_name,
media_length=content_length,
user_id=auth_user,
sha256=sha256,
quarantined_by="system" if should_quarantine else None,
)
# Check that the user has not exceeded any of the media upload limits.

# This is the total size of media uploaded by the user in the last
# `time_period_ms` milliseconds, or None if we haven't checked yet.
uploaded_media_size: Optional[int] = None

# Note: the media upload limits are sorted so larger time periods are
# first.
for limit in self.media_upload_limits:
# We only need to check the amount of media uploaded by the user in
# this latest (smaller) time period if the amount of media uploaded
# in a previous (larger) time period is above the limit.
#
# This optimization means that in the common case where the user
# hasn't uploaded much media, we only need to query the database
# once.
if (
uploaded_media_size is None
or uploaded_media_size + content_length > limit.max_bytes
):
uploaded_media_size = await self.store.get_media_uploaded_size_for_user(
user_id=auth_user.to_string(), time_period_ms=limit.time_period_ms
)

if uploaded_media_size + content_length > limit.max_bytes:
raise SynapseError(
400, "Media upload limit exceeded", Codes.RESOURCE_LIMIT_EXCEEDED
)

if is_new_media:
await self.store.store_local_media(
media_id=media_id,
media_type=media_type,
time_now_ms=self.clock.time_msec(),
upload_name=upload_name,
media_length=content_length,
user_id=auth_user,
sha256=sha256,
quarantined_by="system" if should_quarantine else None,
)
else:
await self.store.update_local_media(
media_id=media_id,
media_type=media_type,
upload_name=upload_name,
media_length=content_length,
user_id=auth_user,
sha256=sha256,
quarantined_by="system" if should_quarantine else None,
)

try:
await self._generate_thumbnails(None, media_id, media_id, media_type)
Expand Down
6 changes: 3 additions & 3 deletions synapse/rest/media/upload_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ async def on_POST(self, request: SynapseRequest) -> None:

try:
content: IO = request.content # type: ignore
content_uri = await self.media_repo.create_content(
content_uri = await self.media_repo.create_or_update_content(
media_type, upload_name, content, content_length, requester.user
)
except SpamMediaException:
Expand Down Expand Up @@ -170,13 +170,13 @@ async def on_PUT(

try:
content: IO = request.content # type: ignore
await self.media_repo.update_content(
media_id,
await self.media_repo.create_or_update_content(
media_type,
upload_name,
content,
content_length,
requester.user,
media_id=media_id,
)
except SpamMediaException:
# For uploading of media we want to respond with a 400, instead of
Expand Down
36 changes: 36 additions & 0 deletions synapse/storage/databases/main/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -1034,3 +1034,39 @@ def get_matching_media_txn(
"local_media_repository",
sha256,
)

async def get_media_uploaded_size_for_user(
self, user_id: str, time_period_ms: int
) -> int:
"""Get the total size of media uploaded by a user in the last
time_period_ms milliseconds.

Args:
user_id: The user ID to check.
time_period_ms: The time period in milliseconds to consider.

Returns:
The total size of media uploaded by the user in bytes.
"""

sql = """
SELECT COALESCE(SUM(media_length), 0)
FROM local_media_repository
WHERE user_id = ? AND created_ts > ?
Comment on lines +1053 to +1055
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This hits the users_have_local_media index which was introduced 5 years ago, verified on m.org that it should be performant

"""

def _get_media_uploaded_size_for_user_txn(
txn: LoggingTransaction,
) -> int:
# Calculate the timestamp for the start of the time period
start_ts = self._clock.time_msec() - time_period_ms
txn.execute(sql, (user_id, start_ts))
row = txn.fetchone()
if row is None:
return 0
return row[0]

return await self.db_pool.runInteraction(
"get_media_uploaded_size_for_user",
_get_media_uploaded_size_for_user_txn,
)
10 changes: 5 additions & 5 deletions tests/federation/test_federation_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
def test_file_download(self) -> None:
content = io.BytesIO(b"file_to_stream")
content_uri = self.get_success(
self.media_repo.create_content(
self.media_repo.create_or_update_content(
"text/plain",
"test_upload",
content,
Expand Down Expand Up @@ -110,7 +110,7 @@ def test_file_download(self) -> None:

content = io.BytesIO(SMALL_PNG)
content_uri = self.get_success(
self.media_repo.create_content(
self.media_repo.create_or_update_content(
"image/png",
"test_png_upload",
content,
Expand Down Expand Up @@ -152,7 +152,7 @@ def test_federation_etag(self) -> None:

content = io.BytesIO(b"file_to_stream")
content_uri = self.get_success(
self.media_repo.create_content(
self.media_repo.create_or_update_content(
"text/plain",
"test_upload",
content,
Expand Down Expand Up @@ -215,7 +215,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
def test_thumbnail_download_scaled(self) -> None:
content = io.BytesIO(small_png.data)
content_uri = self.get_success(
self.media_repo.create_content(
self.media_repo.create_or_update_content(
"image/png",
"test_png_thumbnail",
content,
Expand Down Expand Up @@ -255,7 +255,7 @@ def test_thumbnail_download_scaled(self) -> None:
def test_thumbnail_download_cropped(self) -> None:
content = io.BytesIO(small_png.data)
content_uri = self.get_success(
self.media_repo.create_content(
self.media_repo.create_or_update_content(
"image/png",
"test_png_thumbnail",
content,
Expand Down
2 changes: 1 addition & 1 deletion tests/media/test_media_retention.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _create_media_and_set_attributes(
# If the meda
random_content = bytes(random_string(24), "utf-8")
mxc_uri: MXCUri = self.get_success(
media_repository.create_content(
media_repository.create_or_update_content(
media_type="text/plain",
upload_name=None,
content=io.BytesIO(random_content),
Expand Down
Loading
Loading