Skip to content

Commit 3067566

Browse files
committed
zpool: Allow lockless zpool status
Add a new `ZPOOL_LOCK_BEHAVIOR` envvar to control `zpool status` lock behavior. `ZPOOL_LOCK_BEHAVIOR` can have one of these values: "lockless": Try for a short amount of time to get the spa_namespace lock. If that doesn't work, then do the zpool status locklessly. This is dangerous and can crash your system if the pools configs are being modified while zpool status is running. This setting requires `zpool status` to be run as root. "trylock": Try for a short amount of time to get the spa_namespace lock. If that doesn't work then simply abort 'zpool status'. "wait": Wait forever for the lock. This is the default. These options allow users to view the zpool status when the pool gets stuck while holding the spa_namespace lock. Signed-off-by: Tony Hutter <[email protected]>
1 parent a6cca8a commit 3067566

File tree

27 files changed

+695
-64
lines changed

27 files changed

+695
-64
lines changed

cmd/zdb/zdb.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7723,7 +7723,8 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
77237723

77247724
if (cfg == NULL) {
77257725
zdb_set_skip_mmp(poolname);
7726-
error = spa_get_stats(poolname, &cfg, NULL, 0);
7726+
error = spa_get_stats(poolname, &cfg, NULL, 0,
7727+
ZPOOL_LOCK_BEHAVIOR_DEFAULT);
77277728
if (error != 0) {
77287729
fatal("Tried to read config of pool \"%s\" but "
77297730
"spa_get_stats() failed with error %d\n",

cmd/zpool/zpool_iter.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type,
118118
boolean_t literal, int *err)
119119
{
120120
zpool_list_t *zlp;
121+
int rc;
121122

122123
zlp = safe_malloc(sizeof (zpool_list_t));
123124

@@ -137,7 +138,11 @@ pool_list_get(int argc, char **argv, zprop_list_t **proplist, zfs_type_t type,
137138
zlp->zl_literal = literal;
138139

139140
if (argc == 0) {
140-
(void) zpool_iter(g_zfs, add_pool, zlp);
141+
rc = zpool_iter(g_zfs, add_pool, zlp);
142+
if (rc != 0) {
143+
free(zlp);
144+
return (NULL);
145+
}
141146
zlp->zl_findall = B_TRUE;
142147
} else {
143148
int i;

cmd/zpool/zpool_main.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10979,6 +10979,32 @@ status_callback(zpool_handle_t *zhp, void *data)
1097910979
return (0);
1098010980
}
1098110981

10982+
/*
10983+
* Set the zpool status lock behavior based off of the ZPOOL_LOCK_BEHAVIOR
10984+
* envvar. If the var is not set, or an unknown value, then set the lock
10985+
* behavior to ZPOOL_LOCK_BEHAVIOR_DEFAULT.
10986+
*/
10987+
static void
10988+
zpool_set_lock_behavior(void)
10989+
{
10990+
char *str;
10991+
zpool_lock_behavior_t zpool_lock_behavior;
10992+
10993+
str = getenv("ZPOOL_LOCK_BEHAVIOR");
10994+
if (str == NULL)
10995+
zpool_lock_behavior = ZPOOL_LOCK_BEHAVIOR_DEFAULT;
10996+
else if (strcmp(str, "wait") == 0)
10997+
zpool_lock_behavior = ZPOOL_LOCK_BEHAVIOR_WAIT;
10998+
else if (strcmp(str, "trylock") == 0)
10999+
zpool_lock_behavior = ZPOOL_LOCK_BEHAVIOR_TRYLOCK;
11000+
else if (strcmp(str, "lockless") == 0)
11001+
zpool_lock_behavior = ZPOOL_LOCK_BEHAVIOR_LOCKLESS;
11002+
else
11003+
zpool_lock_behavior = ZPOOL_LOCK_BEHAVIOR_DEFAULT;
11004+
11005+
libzfs_set_lock_behavior(g_zfs, zpool_lock_behavior);
11006+
}
11007+
1098211008
/*
1098311009
* zpool status [-c [script1,script2,...]] [-dDegiLpPstvx] [--power] ...
1098411010
* [-T d|u] [pool] [interval [count]]
@@ -11152,6 +11178,8 @@ zpool_do_status(int argc, char **argv)
1115211178
usage(B_FALSE);
1115311179
}
1115411180

11181+
zpool_set_lock_behavior();
11182+
1115511183
for (;;) {
1115611184
if (cb.cb_json) {
1115711185
cb.cb_jsobj = zpool_json_schema(0, 1);

include/libzfs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,8 @@ _LIBZFS_H int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
264264
nvlist_t *, nvlist_t *);
265265
_LIBZFS_H int zpool_destroy(zpool_handle_t *, const char *);
266266
_LIBZFS_H int zpool_add(zpool_handle_t *, nvlist_t *, boolean_t check_ashift);
267+
_LIBZFS_H void libzfs_set_lock_behavior(libzfs_handle_t *,
268+
zpool_lock_behavior_t);
267269

268270
typedef struct splitflags {
269271
/* do not split, but return the config that would be split off */

include/os/freebsd/spl/sys/mutex.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,19 @@ typedef enum {
7272
#define mutex_owned(lock) sx_xlocked(lock)
7373
#define mutex_owner(lock) sx_xholder(lock)
7474

75+
/*
76+
* Poor-man's version of Linux kernel's down_timeout(). Try to acquire a mutex
77+
* for 'ns' number of nanoseconds. Returns 0 if mutex was acquired or ETIME
78+
* if timeout occurred.
79+
*/
80+
static inline int mutex_enter_timeout(kmutex_t *mutex, uint64_t ns)
81+
{
82+
hrtime_t end = gethrtime() + ns;
83+
while (gethrtime() < end) {
84+
if (mutex_tryenter(mutex))
85+
return (0); /* success */
86+
}
87+
return (ETIME);
88+
}
89+
7590
#endif /* _OPENSOLARIS_SYS_MUTEX_H_ */

include/os/linux/spl/sys/mutex.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#define _SPL_MUTEX_H
2727

2828
#include <sys/types.h>
29+
#include <sys/time.h>
2930
#include <linux/sched.h>
3031
#include <linux/mutex.h>
3132
#include <linux/lockdep.h>
@@ -187,4 +188,19 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
187188
/* NOTE: do not dereference mp after this point */ \
188189
}
189190

191+
/*
192+
* Poor-man's version of Linux kernel's down_timeout(). Try to acquire a mutex
193+
* for 'ns' number of nanoseconds. Returns 0 if mutex was acquired or ETIME
194+
* if timeout occurred.
195+
*/
196+
static inline int mutex_enter_timeout(kmutex_t *mutex, uint64_t ns)
197+
{
198+
hrtime_t end = gethrtime() + ns;
199+
while (gethrtime() < end) {
200+
if (mutex_tryenter(mutex))
201+
return (0); /* success */
202+
}
203+
return (ETIME);
204+
}
205+
190206
#endif /* _SPL_MUTEX_H */

include/sys/fs/zfs.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,9 @@ typedef struct zpool_load_policy {
872872
#define ZPOOL_CONFIG_REBUILD_STATS "org.openzfs:rebuild_stats"
873873
#define ZPOOL_CONFIG_COMPATIBILITY "compatibility"
874874

875+
/* ZFS_IOC_POOL_STATS argument to for spa_namespace locking behavior */
876+
#define ZPOOL_CONFIG_LOCK_BEHAVIOR "lock_behavior" /* not stored on disk */
877+
875878
/*
876879
* The persistent vdev state is stored as separate values rather than a single
877880
* 'vdev_state' entry. This is because a device can be in multiple states, such
@@ -1984,6 +1987,30 @@ enum zio_encrypt {
19841987
ZFS_XA_NS_PREFIX_MATCH(LINUX_TRUSTED, name) || \
19851988
ZFS_XA_NS_PREFIX_MATCH(LINUX_USER, name))
19861989

1990+
/*
1991+
* Set locking behavior for zpool commands.
1992+
*/
1993+
typedef enum {
1994+
/* Wait to acquire the lock on the zpool config */
1995+
ZPOOL_LOCK_BEHAVIOR_WAIT = 0,
1996+
ZPOOL_LOCK_BEHAVIOR_DEFAULT = ZPOOL_LOCK_BEHAVIOR_WAIT,
1997+
/*
1998+
* Return an error if it's taking an unnecessarily long time to
1999+
* acquire the lock on the pool config (default 100ms)
2000+
*/
2001+
ZPOOL_LOCK_BEHAVIOR_TRYLOCK = 1,
2002+
2003+
/*
2004+
* DANGER: THIS CAN CRASH YOUR SYSTEM
2005+
*
2006+
* If you can't acquire the pool config lock after 100ms then do a
2007+
* a lockless lookup. This should only be done in emergencies, as it
2008+
* can crash the kernel module!
2009+
*/
2010+
ZPOOL_LOCK_BEHAVIOR_LOCKLESS = 2,
2011+
ZPOOL_LOCK_BEHAVIOR_END = 3 /* last entry marker */
2012+
} zpool_lock_behavior_t;
2013+
19872014
#ifdef __cplusplus
19882015
}
19892016
#endif

include/sys/spa.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -742,10 +742,13 @@ typedef enum trim_type {
742742

743743
/* state manipulation functions */
744744
extern int spa_open(const char *pool, spa_t **, const void *tag);
745+
extern int spa_open_common_lock_behavior(const char *pool, spa_t **spapp,
746+
const void *tag, nvlist_t *nvpolicy, nvlist_t **config,
747+
zpool_lock_behavior_t zpool_lock_behavior);
745748
extern int spa_open_rewind(const char *pool, spa_t **, const void *tag,
746749
nvlist_t *policy, nvlist_t **config);
747750
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
748-
size_t buflen);
751+
size_t buflen, zpool_lock_behavior_t zpool_lock_behavior);
749752
extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
750753
nvlist_t *zplprops, struct dsl_crypto_params *dcp);
751754
extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
@@ -851,10 +854,13 @@ extern kcondvar_t spa_namespace_cv;
851854

852855
extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
853856
extern void spa_config_load(void);
854-
extern int spa_all_configs(uint64_t *generation, nvlist_t **pools);
857+
extern int spa_all_configs(uint64_t *generation, nvlist_t **pools,
858+
zpool_lock_behavior_t zpool_lock_behavior);
855859
extern void spa_config_set(spa_t *spa, nvlist_t *config);
856860
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
857861
int getstats);
862+
extern nvlist_t *spa_config_generate_lock_behavior(spa_t *spa, vdev_t *vd,
863+
uint64_t txg, int getstats, zpool_lock_behavior_t zpool_lock_behavior);
858864
extern void spa_config_update(spa_t *spa, int what);
859865
extern int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv,
860866
vdev_t *parent, uint_t id, int atype);
@@ -866,9 +872,11 @@ extern int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv,
866872

867873
/* Namespace manipulation */
868874
extern spa_t *spa_lookup(const char *name);
875+
extern spa_t *spa_lookup_lockless(const char *name);
869876
extern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot);
870877
extern void spa_remove(spa_t *spa);
871878
extern spa_t *spa_next(spa_t *prev);
879+
extern spa_t *spa_next_lockless(spa_t *prev);
872880

873881
/* Refcount functions */
874882
extern void spa_open_ref(spa_t *spa, const void *tag);

include/sys/spa_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ extern void spa_set_deadman_synctime(hrtime_t ns);
491491
extern void spa_set_deadman_ziotime(hrtime_t ns);
492492
extern const char *spa_history_zone(void);
493493
extern const char *zfs_active_allocator;
494+
extern unsigned int spa_namespace_trylock_ms;
494495
extern int param_set_active_allocator_common(const char *val);
495496

496497
#ifdef __cplusplus

include/sys/zfs_context.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ extern void mutex_enter(kmutex_t *mp);
277277
extern int mutex_enter_check_return(kmutex_t *mp);
278278
extern void mutex_exit(kmutex_t *mp);
279279
extern int mutex_tryenter(kmutex_t *mp);
280+
extern int mutex_enter_timeout(kmutex_t *mp, uint64_t ns);
280281

281282
#define NESTED_SINGLE 1
282283
#define mutex_enter_nested(mp, class) mutex_enter(mp)

0 commit comments

Comments
 (0)