Skip to content

Commit df9feab

Browse files
committed
add reflink-based dedup; closes #201
1 parent 674fc1f commit df9feab

File tree

6 files changed

+25
-3
lines changed

6 files changed

+25
-3
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1439,12 +1439,17 @@ if you enable deduplication with `--dedup` then it'll create a symlink instead o
14391439
**warning:** when enabling dedup, you should also:
14401440
* enable indexing with `-e2dsa` or volflag `e2dsa` (see [file indexing](#file-indexing) section below); strongly recommended
14411441
* ...and/or `--hardlink-only` to use hardlink-based deduplication instead of symlinks; see explanation below
1442+
* ...and/or `--reflink` to use CoW/reflink-based dedup (much safer than hardlink, but OS/FS-dependent)
14421443
14431444
it will not be safe to rename/delete files if you only enable dedup and none of the above; if you enable indexing then it is not *necessary* to also do hardlinks (but you may still want to)
14441445
14451446
by default, deduplication is done based on symlinks (symbolic links); these are tiny files which are pointers to the nearest full copy of the file
14461447
1447-
you can choose to use hardlinks instead of softlinks, globally with `--hardlink-only` or volflag `hardlinkonly`;
1448+
you can choose to use hardlinks instead of softlinks, globally with `--hardlink-only` or volflag `hardlinkonly`, and you can choose to use reflinks with `--reflink` or volflag `reflink`
1449+
1450+
advantages of using reflinks (CoW, copy-on-write):
1451+
* entirely safe (when your filesystem supports it correctly); either file can be edited or deleted without affecting other copies
1452+
* only linux 5.3 or newer, only python 3.14 or newer, only some filesystems (btrfs probably ok, maybe xfs too, but zfs had bugs)
14481453
14491454
advantages of using hardlinks:
14501455
* hardlinks are more compatible with other software; they behave entirely like regular files

copyparty/__main__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,7 @@ def add_upload(ap):
10561056
ap2.add_argument("--safe-dedup", metavar="N", type=int, default=50, help="how careful to be when deduplicating files; [\033[32m1\033[0m] = just verify the filesize, [\033[32m50\033[0m] = verify file contents have not been altered (volflag=safededup)")
10571057
ap2.add_argument("--hardlink", action="store_true", help="enable hardlink-based dedup; will fallback on symlinks when that is impossible (across filesystems) (volflag=hardlink)")
10581058
ap2.add_argument("--hardlink-only", action="store_true", help="do not fallback to symlinks when a hardlink cannot be made (volflag=hardlinkonly)")
1059+
ap2.add_argument("--reflink", action="store_true", help="enable reflink-based dedup; will fallback on full copies when that is impossible (non-CoW filesystem) (volflag=reflink)")
10591060
ap2.add_argument("--no-dupe", action="store_true", help="reject duplicate files during upload; only matches within the same volume (volflag=nodupe)")
10601061
ap2.add_argument("--no-clone", action="store_true", help="do not use existing data on disk to satisfy dupe uploads; reduces server HDD reads in exchange for much more network load (volflag=noclone)")
10611062
ap2.add_argument("--no-snap", action="store_true", help="disable snapshots -- forget unfinished uploads on shutdown; don't create .hist/up2k.snap files -- abandoned/interrupted uploads must be cleaned up manually")

copyparty/authsrv.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,6 +2124,7 @@ def _reload(self, verbosity: int = 9) -> None:
21242124
all_mte = {}
21252125
errors = False
21262126
free_umask = False
2127+
have_reflink = False
21272128
for vol in vfs.all_nodes.values():
21282129
if (self.args.e2ds and vol.axs.uwrite) or self.args.e2dsa:
21292130
vol.flags["e2ds"] = True
@@ -2207,6 +2208,9 @@ def _reload(self, verbosity: int = 9) -> None:
22072208
if "unlistcr" in vol.flags or "unlistcw" in vol.flags:
22082209
self.args.have_unlistc = True
22092210

2211+
if "reflink" in vol.flags:
2212+
have_reflink = True
2213+
22102214
zs = str(vol.flags.get("tcolor", "")).lstrip("#")
22112215
if len(zs) == 3: # fc5 => ffcc55
22122216
vol.flags["tcolor"] = "".join([x * 2 for x in zs])
@@ -2571,6 +2575,13 @@ def _reload(self, verbosity: int = 9) -> None:
25712575
t = "WARNING! The following IdP volumes are mounted below another volume where other users can read and/or write files. This is a SECURITY HAZARD!! When copyparty is restarted, it will not know about these IdP volumes yet. These volumes will then be accessible by an unexpected set of permissions UNTIL one of the users associated with their volume sends a request to the server. RECOMMENDATION: You should create a restricted volume where nobody can read/write files, and make sure that all IdP volumes are configured to appear somewhere below that volume."
25722576
self.log(t + "".join(self.idp_err), 1)
25732577

2578+
if have_reflink:
2579+
t = "WARNING: Reflink-based dedup was requested, but %s. This will not work; files will be full copies instead."
2580+
if sys.version_info < (3, 14):
2581+
self.log(t % "your python version is not new enough", 1)
2582+
if not sys.platform.startswith("linux"):
2583+
self.log(t % "your OS is not Linux", 1)
2584+
25742585
self.vfs = vfs
25752586
self.acct = acct
25762587
self.defpw = defpw

copyparty/cfg.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def vf_bmap() -> dict[str, str]:
5252
"og_no_head",
5353
"og_s_title",
5454
"rand",
55+
"reflink",
5556
"rmagic",
5657
"rss",
5758
"wo_up_readme",
@@ -168,6 +169,7 @@ def vf_cmap() -> dict[str, str]:
168169
"dedup": "enable symlink-based file deduplication",
169170
"hardlink": "enable hardlink-based file deduplication,\nwith fallback on symlinks when that is impossible",
170171
"hardlinkonly": "dedup with hardlink only, never symlink;\nmake a full copy if hardlink is impossible",
172+
"reflink": "enable reflink-based file deduplication,\nwith fallback on full copy when that is impossible",
171173
"safededup": "verify on-disk data before using it for dedup",
172174
"noclone": "take dupe data from clients, even if available on HDD",
173175
"nodupe": "rejects existing files (instead of linking/cloning them)",

copyparty/up2k.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3476,6 +3476,8 @@ def _symlink(
34763476

34773477
linked = False
34783478
try:
3479+
if "reflink" in flags:
3480+
raise Exception("reflink")
34793481
if not is_mv and not flags.get("dedup"):
34803482
raise Exception("dedup is disabled in config")
34813483

@@ -3532,7 +3534,8 @@ def _symlink(
35323534

35333535
linked = True
35343536
except Exception as ex:
3535-
self.log("cannot link; creating copy: " + repr(ex))
3537+
if str(ex) != "reflink":
3538+
self.log("cannot link; creating copy: " + repr(ex))
35363539
if bos.path.isfile(src):
35373540
csrc = src
35383541
elif fsrc and bos.path.isfile(fsrc):

tests/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ class Cfg(Namespace):
143143
def __init__(self, a=None, v=None, c=None, **ka0):
144144
ka = {}
145145

146-
ex = "chpw cookie_lax daw dav_auth dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink hardlink_only ih ihead magic nid nih no_acode no_athumb no_bauth no_clone no_cp no_dav no_db_ip no_del no_dirsz no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tail no_tarcmp no_thumb no_vthumb no_zip nrand nsort nw og og_no_head og_s_title ohead q rand re_dirsz rmagic rss smb srch_dbg srch_excl stats uqe vague_403 vc ver wo_up_readme write_uplog xdev xlink xvol zipmaxu zs"
146+
ex = "chpw cookie_lax daw dav_auth dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink hardlink_only ih ihead magic nid nih no_acode no_athumb no_bauth no_clone no_cp no_dav no_db_ip no_del no_dirsz no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tail no_tarcmp no_thumb no_vthumb no_zip nrand nsort nw og og_no_head og_s_title ohead q rand re_dirsz reflink rmagic rss smb srch_dbg srch_excl stats uqe vague_403 vc ver wo_up_readme write_uplog xdev xlink xvol zipmaxu zs"
147147
ka.update(**{k: False for k in ex.split()})
148148

149149
ex = "dav_inf dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash see_dots plain_ip"

0 commit comments

Comments
 (0)