Skip to content

Commit 24c534b

Browse files
committed
Merge tag 'ceph-for-4.11-rc2' of git://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov: - a fix for the recently discovered misdirected requests bug present in jewel and later on the server side and all stable kernels - a fixup for -rc1 CRUSH changes - two usability enhancements: osd_request_timeout option and supported_features bus attribute. * tag 'ceph-for-4.11-rc2' of git://github.com/ceph/ceph-client: libceph: osd_request_timeout option rbd: supported_features bus attribute libceph: don't set weight to IN when OSD is destroyed libceph: fix crush_decode() for older maps
2 parents 2baf380 + 7cc5e38 commit 24c534b

File tree

6 files changed

+66
-8
lines changed

6 files changed

+66
-8
lines changed

drivers/block/rbd.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,11 @@ static int atomic_dec_return_safe(atomic_t *v)
120120

121121
/* Feature bits */
122122

123-
#define RBD_FEATURE_LAYERING (1<<0)
124-
#define RBD_FEATURE_STRIPINGV2 (1<<1)
125-
#define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2)
126-
#define RBD_FEATURE_DATA_POOL (1<<7)
123+
#define RBD_FEATURE_LAYERING (1ULL<<0)
124+
#define RBD_FEATURE_STRIPINGV2 (1ULL<<1)
125+
#define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2)
126+
#define RBD_FEATURE_DATA_POOL (1ULL<<7)
127+
127128
#define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \
128129
RBD_FEATURE_STRIPINGV2 | \
129130
RBD_FEATURE_EXCLUSIVE_LOCK | \
@@ -499,16 +500,23 @@ static bool rbd_is_lock_owner(struct rbd_device *rbd_dev)
499500
return is_lock_owner;
500501
}
501502

503+
static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf)
504+
{
505+
return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
506+
}
507+
502508
static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
503509
static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
504510
static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
505511
static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
512+
static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL);
506513

507514
static struct attribute *rbd_bus_attrs[] = {
508515
&bus_attr_add.attr,
509516
&bus_attr_remove.attr,
510517
&bus_attr_add_single_major.attr,
511518
&bus_attr_remove_single_major.attr,
519+
&bus_attr_supported_features.attr,
512520
NULL,
513521
};
514522

include/linux/ceph/libceph.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ struct ceph_options {
4848
unsigned long mount_timeout; /* jiffies */
4949
unsigned long osd_idle_ttl; /* jiffies */
5050
unsigned long osd_keepalive_timeout; /* jiffies */
51+
unsigned long osd_request_timeout; /* jiffies */
5152

5253
/*
5354
* any type that can't be simply compared or doesn't need need
@@ -68,6 +69,7 @@ struct ceph_options {
6869
#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
6970
#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
7071
#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
72+
#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */
7173

7274
#define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000)
7375
#define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000)

include/linux/ceph/osd_client.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ struct ceph_osd_request {
189189

190190
/* internal */
191191
unsigned long r_stamp; /* jiffies, send or check time */
192+
unsigned long r_start_stamp; /* jiffies */
192193
int r_attempts;
193194
struct ceph_eversion r_replay_version; /* aka reassert_version */
194195
u32 r_last_force_resend;

net/ceph/ceph_common.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ enum {
230230
Opt_osdkeepalivetimeout,
231231
Opt_mount_timeout,
232232
Opt_osd_idle_ttl,
233+
Opt_osd_request_timeout,
233234
Opt_last_int,
234235
/* int args above */
235236
Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
256257
{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
257258
{Opt_mount_timeout, "mount_timeout=%d"},
258259
{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
260+
{Opt_osd_request_timeout, "osd_request_timeout=%d"},
259261
/* int args above */
260262
{Opt_fsid, "fsid=%s"},
261263
{Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
361363
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
362364
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
363365
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
366+
opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
364367

365368
/* get mon ip(s) */
366369
/* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
473476
}
474477
opt->mount_timeout = msecs_to_jiffies(intval * 1000);
475478
break;
479+
case Opt_osd_request_timeout:
480+
/* 0 is "wait forever" (i.e. infinite timeout) */
481+
if (intval < 0 || intval > INT_MAX / 1000) {
482+
pr_err("osd_request_timeout out of range\n");
483+
err = -EINVAL;
484+
goto out;
485+
}
486+
opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
487+
break;
476488

477489
case Opt_share:
478490
opt->flags &= ~CEPH_OPT_NOSHARE;
@@ -557,6 +569,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
557569
if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
558570
seq_printf(m, "osdkeepalivetimeout=%d,",
559571
jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
572+
if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT)
573+
seq_printf(m, "osd_request_timeout=%d,",
574+
jiffies_to_msecs(opt->osd_request_timeout) / 1000);
560575

561576
/* drop redundant comma */
562577
if (m->count != pos)

net/ceph/osd_client.c

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1709,6 +1709,8 @@ static void account_request(struct ceph_osd_request *req)
17091709

17101710
req->r_flags |= CEPH_OSD_FLAG_ONDISK;
17111711
atomic_inc(&req->r_osdc->num_requests);
1712+
1713+
req->r_start_stamp = jiffies;
17121714
}
17131715

17141716
static void submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -1789,6 +1791,14 @@ static void cancel_request(struct ceph_osd_request *req)
17891791
ceph_osdc_put_request(req);
17901792
}
17911793

1794+
static void abort_request(struct ceph_osd_request *req, int err)
1795+
{
1796+
dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err);
1797+
1798+
cancel_map_check(req);
1799+
complete_request(req, err);
1800+
}
1801+
17921802
static void check_pool_dne(struct ceph_osd_request *req)
17931803
{
17941804
struct ceph_osd_client *osdc = req->r_osdc;
@@ -2487,6 +2497,7 @@ static void handle_timeout(struct work_struct *work)
24872497
container_of(work, struct ceph_osd_client, timeout_work.work);
24882498
struct ceph_options *opts = osdc->client->options;
24892499
unsigned long cutoff = jiffies - opts->osd_keepalive_timeout;
2500+
unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout;
24902501
LIST_HEAD(slow_osds);
24912502
struct rb_node *n, *p;
24922503

@@ -2502,15 +2513,23 @@ static void handle_timeout(struct work_struct *work)
25022513
struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
25032514
bool found = false;
25042515

2505-
for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) {
2516+
for (p = rb_first(&osd->o_requests); p; ) {
25062517
struct ceph_osd_request *req =
25072518
rb_entry(p, struct ceph_osd_request, r_node);
25082519

2520+
p = rb_next(p); /* abort_request() */
2521+
25092522
if (time_before(req->r_stamp, cutoff)) {
25102523
dout(" req %p tid %llu on osd%d is laggy\n",
25112524
req, req->r_tid, osd->o_osd);
25122525
found = true;
25132526
}
2527+
if (opts->osd_request_timeout &&
2528+
time_before(req->r_start_stamp, expiry_cutoff)) {
2529+
pr_err_ratelimited("tid %llu on osd%d timeout\n",
2530+
req->r_tid, osd->o_osd);
2531+
abort_request(req, -ETIMEDOUT);
2532+
}
25142533
}
25152534
for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) {
25162535
struct ceph_osd_linger_request *lreq =
@@ -2530,6 +2549,21 @@ static void handle_timeout(struct work_struct *work)
25302549
list_move_tail(&osd->o_keepalive_item, &slow_osds);
25312550
}
25322551

2552+
if (opts->osd_request_timeout) {
2553+
for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) {
2554+
struct ceph_osd_request *req =
2555+
rb_entry(p, struct ceph_osd_request, r_node);
2556+
2557+
p = rb_next(p); /* abort_request() */
2558+
2559+
if (time_before(req->r_start_stamp, expiry_cutoff)) {
2560+
pr_err_ratelimited("tid %llu on osd%d timeout\n",
2561+
req->r_tid, osdc->homeless_osd.o_osd);
2562+
abort_request(req, -ETIMEDOUT);
2563+
}
2564+
}
2565+
}
2566+
25332567
if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds))
25342568
maybe_request_map(osdc);
25352569

net/ceph/osdmap.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -390,9 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
390390
dout("crush decode tunable chooseleaf_stable = %d\n",
391391
c->chooseleaf_stable);
392392

393-
crush_finalize(c);
394-
395393
done:
394+
crush_finalize(c);
396395
dout("crush_decode success\n");
397396
return c;
398397

@@ -1380,7 +1379,6 @@ static int decode_new_up_state_weight(void **p, void *end,
13801379
if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
13811380
(xorstate & CEPH_OSD_EXISTS)) {
13821381
pr_info("osd%d does not exist\n", osd);
1383-
map->osd_weight[osd] = CEPH_OSD_IN;
13841382
ret = set_primary_affinity(map, osd,
13851383
CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
13861384
if (ret)

0 commit comments

Comments
 (0)