-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Logic for Healing of Sparse Files #4554
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: devel
Are you sure you want to change the base?
Conversation
Problem: Sparse files were not handled in the healing logic, causing them to occupy their full size after healing. Fix: Similar to how sparse files are handled in dht-rebalance, holes and data regions are found using ec_seek() and only the data segments are written to the healed file. Fixes: gluster#3546 Signed-off-by: sbk173 <[email protected]>
Can one of the admins verify this patch? |
1 similar comment
Can one of the admins verify this patch? |
CLANG-FORMAT FAILURE: index 7daa05e84..d5982fa6d 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -2046,7 +2046,8 @@ ec_sync_heal_block(call_frame_t *frame, xlator_t *this, ec_heal_t *heal)
void
ec_heal_seek_hole_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata)
+ int32_t op_ret, int32_t op_errno, off_t offset,
+ dict_t *xdata)
{
ec_fop_data_t *fop = cookie;
ec_heal_t *heal = fop->data;
@@ -2063,12 +2064,13 @@ out:
void
ec_heal_seek_data_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, off_t offset, dict_t * xdata)
+ int32_t op_ret, int32_t op_errno, off_t offset,
+ dict_t *xdata)
{
ec_fop_data_t *fop = cookie;
ec_heal_t *heal = fop->data;
- if (op_ret < 0){
+ if (op_ret < 0) {
heal->done = _gf_true;
goto out;
}
@@ -2082,36 +2084,33 @@ int32_t
ec_sync_heal_sparse_region(call_frame_t *frame, ec_t *ec, ec_heal_t *heal)
{
int ret = 0;
- ec_seek(frame, ec->xl, heal->good, EC_MINIMUM_ONE,
- ec_heal_seek_data_cbk, heal, heal->fd, heal->offset,
- GF_SEEK_DATA, NULL);
+ ec_seek(frame, ec->xl, heal->good, EC_MINIMUM_ONE, ec_heal_seek_data_cbk,
+ heal, heal->fd, heal->offset, GF_SEEK_DATA, NULL);
syncbarrier_wait(&heal->barrier, 1);
if (heal->done)
goto out;
- ec_seek(frame, ec->xl, heal->good, EC_MINIMUM_ONE,
- ec_heal_seek_hole_cbk, heal, heal->fd, heal->offset,
- GF_SEEK_HOLE, NULL);
- syncbarrier_wait(&heal->barrier,1);
-
+ ec_seek(frame, ec->xl, heal->good, EC_MINIMUM_ONE, ec_heal_seek_hole_cbk,
+ heal, heal->fd, heal->offset, GF_SEEK_HOLE, NULL);
+ syncbarrier_wait(&heal->barrier, 1);
+
if (heal->error != 0) {
ret = -heal->error;
goto out;
}
for (; (heal->offset < heal->hole_offset) && (!heal->done);
- heal->offset += heal->size) {
-
+ heal->offset += heal->size) {
uint64_t data_block_size = heal->hole_offset - heal->offset;
- data_block_size = (data_block_size > ec->stripe_size)?
- data_block_size : ec->stripe_size;
+ data_block_size = (data_block_size > ec->stripe_size) ? data_block_size
+ : ec->stripe_size;
uint64_t original_heal_size = heal->size;
- if(data_block_size < heal->size)
+ if (data_block_size < heal->size)
heal->size = data_block_size;
-
+
ret = ec_sync_heal_block(frame, ec->xl, heal);
if (ret < 0)
goto out;
@@ -2122,12 +2121,12 @@ ec_sync_heal_sparse_region(call_frame_t *frame, ec_t *ec, ec_heal_t *heal)
out:
return ret;
-
}
int
ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
- unsigned char *sources, unsigned char *healed_sinks, int hole_exists)
+ unsigned char *sources, unsigned char *healed_sinks,
+ int hole_exists)
{
ec_heal_t obj, *heal = &obj;
int ret = 0;
@@ -2154,45 +2153,43 @@ ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
if (!hole_exists) {
for (heal->offset = 0; (heal->offset < size) && !heal->done;
- heal->offset += heal->size) {
+ heal->offset += heal->size) {
/* We immediately abort any heal if a shutdown request has been
- * received to avoid delays. The healing of this file will be
- * restarted by another SHD or other client that accesses the
- * file. */
+ * received to avoid delays. The healing of this file will be
+ * restarted by another SHD or other client that accesses the
+ * file. */
if (ec->shutdown) {
gf_msg_debug(ec->xl->name, 0,
- "Cancelling heal because "
- "EC is stopping.");
+ "Cancelling heal because "
+ "EC is stopping.");
ret = -ENOTCONN;
break;
}
- gf_msg_debug(ec->xl->name, 0,
- "%s: sources: %d, sinks: "
- "%d, offset: %" PRIu64 " bsize: %" PRIu64,
- uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
- EC_COUNT(healed_sinks, ec->nodes), heal->offset,
- heal->size);
+ gf_msg_debug(
+ ec->xl->name, 0,
+ "%s: sources: %d, sinks: "
+ "%d, offset: %" PRIu64 " bsize: %" PRIu64,
+ uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
+ EC_COUNT(healed_sinks, ec->nodes), heal->offset, heal->size);
ret = ec_sync_heal_block(frame, ec->xl, heal);
if (ret < 0)
break;
}
-
+
} else {
heal->offset = 0;
while (!heal->done) {
if (ec->shutdown) {
gf_msg_debug(ec->xl->name, 0,
- "Cancelling heal because "
- "EC is stopping.");
+ "Cancelling heal because "
+ "EC is stopping.");
ret = -ENOTCONN;
-
}
ret = ec_sync_heal_sparse_region(frame, ec, heal);
if (ret < 0)
break;
}
-
}
memset(healed_sinks, 0, ec->nodes);
ec_mask_to_char_array(heal->bad, healed_sinks, ec->nodes);
@@ -2201,7 +2198,7 @@ ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
syncbarrier_destroy(&heal->barrier);
if (ret < 0)
gf_msg_debug(ec->xl->name, -ret, "%s: heal failed",
- uuid_utoa(fd->inode->gfid));
+ uuid_utoa(fd->inode->gfid));
return ret;
}
@@ -2475,12 +2472,14 @@ __ec_heal_data(call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on,
}
ret = __ec_heal_data_prepare(frame, ec, fd, locked_on, versions, dirty,
- size, sources, healed_sinks, trim, &source_buf);
+ size, sources, healed_sinks, trim,
+ &source_buf);
if (ret < 0)
goto unlock;
-
- if (source_buf.ia_blocks * source_buf.ia_blksize != source_buf.ia_size) {
- file_has_holes = 1;
+
+ if (source_buf.ia_blocks * source_buf.ia_blksize !=
+ source_buf.ia_size) {
+ file_has_holes = 1;
}
if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
@@ -2512,7 +2511,8 @@ unlock:
uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
EC_COUNT(healed_sinks, ec->nodes));
- ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks, file_has_holes);
+ ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks,
+ file_has_holes);
if (ret < 0)
goto out;
|
@@ -2127,6 +2225,10 @@ __ec_heal_trim_sinks(call_frame_t *frame, ec_t *ec, fd_t *fd, | |||
} | |||
trim_offset = size; | |||
ec_adjust_offset_up(ec, &trim_offset, _gf_true); | |||
if (file_has_holes) { | |||
ret = cluster_ftruncate(ec->xl_list, trim, ec->nodes, replies, output, | |||
frame, ec->xl, fd, 0, NULL); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Failures in this operation needs to be considered like in lines between 2234-2242
CLANG-FORMAT FAILURE: index 04378203a..01272719a 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -2205,7 +2205,7 @@ ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
syncbarrier_destroy(&heal->barrier);
if (ret < 0)
gf_msg_debug(ec->xl->name, -ret, "%s: heal failed",
- uuid_utoa(fd->inode->gfid));
+ uuid_utoa(fd->inode->gfid));
return ret;
}
@@ -2522,7 +2522,8 @@ unlock:
uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
EC_COUNT(healed_sinks, ec->nodes));
- ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks, file_has_holes);
+ ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks,
+ file_has_holes);
if (ret < 0)
goto out;
|
syncbarrier_wait(&heal->barrier, 1); | ||
|
||
if (heal->error != 0) { | ||
if (heal->error = ENXIO) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo! IMHO paying attention to compiler warnings makes sense :/
Problem:
Sparse files were not handled in the healing logic, causing them to occupy their full size after healing.
Fix:
Similar to how sparse files are handled in dht-rebalance, holes and data regions are found using ec_seek() and only the data segments are written to the healed file.
Fixes: #3546