Logic for Healing of Sparse Files #4554

sbk173 · 2025-06-02T05:58:31Z

Problem:
Sparse files were not handled in the healing logic, causing them to occupy their full size after healing.

Fix:
Similar to how sparse files are handled in dht-rebalance, holes and data regions are found using ec_seek() and only the data segments are written to the healed file.

Fixes: #3546

Problem: Sparse files were not handled in the healing logic, causing them to occupy their full size after healing. Fix: Similar to how sparse files are handled in dht-rebalance, holes and data regions are found using ec_seek() and only the data segments are written to the healed file. Fixes: gluster#3546 Signed-off-by: sbk173 <[email protected]>

gluster-ant · 2025-06-02T05:58:33Z

Can one of the admins verify this patch?

gluster-ant · 2025-06-02T05:58:33Z

Can one of the admins verify this patch?

gluster-ant · 2025-06-02T05:59:48Z

CLANG-FORMAT FAILURE:
Before merging the patch, this diff needs to be considered for passing clang-format

index 7daa05e84..d5982fa6d 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -2046,7 +2046,8 @@ ec_sync_heal_block(call_frame_t *frame, xlator_t *this, ec_heal_t *heal)
 
 void
 ec_heal_seek_hole_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
-    int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata)
+                      int32_t op_ret, int32_t op_errno, off_t offset,
+                      dict_t *xdata)
 {
     ec_fop_data_t *fop = cookie;
     ec_heal_t *heal = fop->data;
@@ -2063,12 +2064,13 @@ out:
 
 void
 ec_heal_seek_data_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
-		    int32_t op_ret, int32_t op_errno, off_t offset, dict_t * xdata)
+                      int32_t op_ret, int32_t op_errno, off_t offset,
+                      dict_t *xdata)
 {
     ec_fop_data_t *fop = cookie;
     ec_heal_t *heal = fop->data;
 
-    if (op_ret < 0){
+    if (op_ret < 0) {
         heal->done = _gf_true;
         goto out;
     }
@@ -2082,36 +2084,33 @@ int32_t
 ec_sync_heal_sparse_region(call_frame_t *frame, ec_t *ec, ec_heal_t *heal)
 {
     int ret = 0;
-    ec_seek(frame, ec->xl, heal->good, EC_MINIMUM_ONE,
-            ec_heal_seek_data_cbk, heal, heal->fd, heal->offset,
-            GF_SEEK_DATA, NULL);
+    ec_seek(frame, ec->xl, heal->good, EC_MINIMUM_ONE, ec_heal_seek_data_cbk,
+            heal, heal->fd, heal->offset, GF_SEEK_DATA, NULL);
     syncbarrier_wait(&heal->barrier, 1);
 
     if (heal->done)
         goto out;
 
-    ec_seek(frame, ec->xl, heal->good, EC_MINIMUM_ONE,
-            ec_heal_seek_hole_cbk, heal, heal->fd, heal->offset,
-            GF_SEEK_HOLE, NULL);
-    syncbarrier_wait(&heal->barrier,1);
-    
+    ec_seek(frame, ec->xl, heal->good, EC_MINIMUM_ONE, ec_heal_seek_hole_cbk,
+            heal, heal->fd, heal->offset, GF_SEEK_HOLE, NULL);
+    syncbarrier_wait(&heal->barrier, 1);
+
     if (heal->error != 0) {
         ret = -heal->error;
         goto out;
     }
 
     for (; (heal->offset < heal->hole_offset) && (!heal->done);
-            heal->offset += heal->size) {
-
+         heal->offset += heal->size) {
         uint64_t data_block_size = heal->hole_offset - heal->offset;
-        data_block_size = (data_block_size > ec->stripe_size)?
-                            data_block_size : ec->stripe_size;
+        data_block_size = (data_block_size > ec->stripe_size) ? data_block_size
+                                                              : ec->stripe_size;
 
         uint64_t original_heal_size = heal->size;
 
-        if(data_block_size < heal->size)
+        if (data_block_size < heal->size)
             heal->size = data_block_size;
-        
+
         ret = ec_sync_heal_block(frame, ec->xl, heal);
         if (ret < 0)
             goto out;
@@ -2122,12 +2121,12 @@ ec_sync_heal_sparse_region(call_frame_t *frame, ec_t *ec, ec_heal_t *heal)
 
 out:
     return ret;
-    
 }
 
 int
 ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
-                unsigned char *sources, unsigned char *healed_sinks, int hole_exists)
+                unsigned char *sources, unsigned char *healed_sinks,
+                int hole_exists)
 {
     ec_heal_t obj, *heal = &obj;
     int ret = 0;
@@ -2154,45 +2153,43 @@ ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
 
     if (!hole_exists) {
         for (heal->offset = 0; (heal->offset < size) && !heal->done;
-            heal->offset += heal->size) {
+             heal->offset += heal->size) {
             /* We immediately abort any heal if a shutdown request has been
-            * received to avoid delays. The healing of this file will be
-            * restarted by another SHD or other client that accesses the
-            * file. */
+             * received to avoid delays. The healing of this file will be
+             * restarted by another SHD or other client that accesses the
+             * file. */
             if (ec->shutdown) {
                 gf_msg_debug(ec->xl->name, 0,
-                            "Cancelling heal because "
-                            "EC is stopping.");
+                             "Cancelling heal because "
+                             "EC is stopping.");
                 ret = -ENOTCONN;
                 break;
             }
 
-            gf_msg_debug(ec->xl->name, 0,
-                        "%s: sources: %d, sinks: "
-                        "%d, offset: %" PRIu64 " bsize: %" PRIu64,
-                        uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
-                        EC_COUNT(healed_sinks, ec->nodes), heal->offset,
-                        heal->size);
+            gf_msg_debug(
+                ec->xl->name, 0,
+                "%s: sources: %d, sinks: "
+                "%d, offset: %" PRIu64 " bsize: %" PRIu64,
+                uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
+                EC_COUNT(healed_sinks, ec->nodes), heal->offset, heal->size);
             ret = ec_sync_heal_block(frame, ec->xl, heal);
             if (ret < 0)
                 break;
         }
-    
+
     } else {
         heal->offset = 0;
         while (!heal->done) {
             if (ec->shutdown) {
                 gf_msg_debug(ec->xl->name, 0,
-                            "Cancelling heal because "
-                            "EC is stopping.");
+                             "Cancelling heal because "
+                             "EC is stopping.");
                 ret = -ENOTCONN;
-    
             }
             ret = ec_sync_heal_sparse_region(frame, ec, heal);
             if (ret < 0)
                 break;
         }
-
     }
     memset(healed_sinks, 0, ec->nodes);
     ec_mask_to_char_array(heal->bad, healed_sinks, ec->nodes);
@@ -2201,7 +2198,7 @@ ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
     syncbarrier_destroy(&heal->barrier);
     if (ret < 0)
         gf_msg_debug(ec->xl->name, -ret, "%s: heal failed",
-                    uuid_utoa(fd->inode->gfid));
+                     uuid_utoa(fd->inode->gfid));
     return ret;
 }
 
@@ -2475,12 +2472,14 @@ __ec_heal_data(call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on,
         }
 
         ret = __ec_heal_data_prepare(frame, ec, fd, locked_on, versions, dirty,
-                                     size, sources, healed_sinks, trim, &source_buf);
+                                     size, sources, healed_sinks, trim,
+                                     &source_buf);
         if (ret < 0)
             goto unlock;
-        
-        if (source_buf.ia_blocks * source_buf.ia_blksize != source_buf.ia_size) {
-                file_has_holes = 1;
+
+        if (source_buf.ia_blocks * source_buf.ia_blksize !=
+            source_buf.ia_size) {
+            file_has_holes = 1;
         }
 
         if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
@@ -2512,7 +2511,8 @@ unlock:
                  uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
                  EC_COUNT(healed_sinks, ec->nodes));
 
-    ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks, file_has_holes);
+    ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks,
+                          file_has_holes);
     if (ret < 0)
         goto out;

pranithk · 2025-06-03T06:51:54Z

xlators/cluster/ec/src/ec-heal.c

@@ -2127,6 +2225,10 @@ __ec_heal_trim_sinks(call_frame_t *frame, ec_t *ec, fd_t *fd,
    }
    trim_offset = size;
    ec_adjust_offset_up(ec, &trim_offset, _gf_true);
+    if (file_has_holes) {
+        ret = cluster_ftruncate(ec->xl_list, trim, ec->nodes, replies, output,
+                                frame, ec->xl, fd, 0, NULL);


Failures in this operation needs to be considered like in lines between 2234-2242

xlators/cluster/ec/src/ec-heal.c

gluster-ant · 2025-06-06T09:40:44Z

CLANG-FORMAT FAILURE:
Before merging the patch, this diff needs to be considered for passing clang-format

index 04378203a..01272719a 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -2205,7 +2205,7 @@ ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
     syncbarrier_destroy(&heal->barrier);
     if (ret < 0)
         gf_msg_debug(ec->xl->name, -ret, "%s: heal failed",
-                    uuid_utoa(fd->inode->gfid));
+                     uuid_utoa(fd->inode->gfid));
     return ret;
 }
 
@@ -2522,7 +2522,8 @@ unlock:
                  uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
                  EC_COUNT(healed_sinks, ec->nodes));
 
-    ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks, file_has_holes);
+    ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks,
+                          file_has_holes);
     if (ret < 0)
         goto out;

morgoth6 · 2025-07-28T20:33:02Z

xlators/cluster/ec/src/ec-heal.c

+    syncbarrier_wait(&heal->barrier, 1);
+
+    if (heal->error != 0) {
+        if (heal->error = ENXIO) {


Typo! IMHO paying attention to compiler warnings makes sense :/

pranithk requested changes Jun 3, 2025

View reviewed changes

Resolved comments

2110d82

morgoth6 reviewed Jul 28, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Logic for Healing of Sparse Files #4554

Logic for Healing of Sparse Files #4554

Uh oh!

sbk173 commented Jun 2, 2025

Uh oh!

gluster-ant commented Jun 2, 2025

Uh oh!

gluster-ant commented Jun 2, 2025

Uh oh!

gluster-ant commented Jun 2, 2025

Uh oh!

pranithk Jun 3, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

gluster-ant commented Jun 6, 2025

Uh oh!

morgoth6 Jul 28, 2025

Uh oh!

Uh oh!

Logic for Healing of Sparse Files #4554

Are you sure you want to change the base?

Logic for Healing of Sparse Files #4554

Uh oh!

Conversation

sbk173 commented Jun 2, 2025

Uh oh!

gluster-ant commented Jun 2, 2025

Uh oh!

gluster-ant commented Jun 2, 2025

Uh oh!

gluster-ant commented Jun 2, 2025

Uh oh!

pranithk Jun 3, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

gluster-ant commented Jun 6, 2025

Uh oh!

morgoth6 Jul 28, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!