510 lines
21 KiB
Diff
510 lines
21 KiB
Diff
From 212aa5e819ced67e152a4efc43e0fab38a3f60a0 Mon Sep 17 00:00:00 2001
|
|
From: Franck Bui <fbui@suse.com>
|
|
Date: Wed, 3 Mar 2021 17:34:39 +0100
|
|
Subject: [PATCH 1001/1001] udev: use lock when selecting the highest priority
|
|
devlink
|
|
|
|
Commit 30f6dce62cb3a738b20253f2192270607c31b55b introduced a lock-less
|
|
algorithm, which is a kind of spinlock in userspace spinning on a heavy loop
|
|
(!?!), that is supposed to fix the race that might happen when multiple
|
|
workers/devices claim for the same symlink.
|
|
|
|
But the algorithm is boggus as every worker will prefer itself and will try to
|
|
"steal" the link from another worker with the same priority.
|
|
|
|
Rather than trying to fix the algorithm and close all possible races, let's use
|
|
a much simplest approach which consists in using a lock. This was originally
|
|
implemented by Martin Wilck: https://github.com/systemd/systemd/pull/8667 but
|
|
somehow upstream preferred the more complex lock-less algo without any proof of
|
|
benefits (quite the opposite since there're only drawback AFAICS).
|
|
|
|
In fact the figures we collected so far tends to show that the lock approach is
|
|
even faster than the racy version without any fix.
|
|
|
|
This patch basically drop the lock-less algo introduced by commit
|
|
30f6dce62cb3a738b20253f2192270607c31b55b and reuses Martin's lock approach with
|
|
some minor improvements.
|
|
|
|
Compare to the old implementation (before commit 30f6dce62cb3a738b20253f21), it
|
|
changes the moment when the symlink are created: it's now done *after* udev DB
|
|
has been updated, hence removing the risk for another worker to overwrite a
|
|
just created symlink with a higher priority which has not yet been registed in
|
|
the DB. Latest versions of upstream calls update_devnode() twice, see
|
|
https://github.com/systemd/systemd/pull/19560#issuecomment-836693914 for the
|
|
"rationale".
|
|
|
|
Update v2:
|
|
----------
|
|
v249.13 includes a bunch of reworks of this lock-less algorithm, which confirm
|
|
how this design was a bad idea (upstream eventually got rid of it and replaced
|
|
it with the lock approach since v252). This patch was rebased to drop these
|
|
changes as they're useless when the link directories are protected with locks.
|
|
|
|
v249.13 also introduced a new format for the link directory entries. I'm not
|
|
sure how relevant such changes are for a stable release but it increases the
|
|
risk of regression significantly. So this part was also dropped.
|
|
|
|
[fbui: fixes bsc#1181192]
|
|
|
|
Update v3:
|
|
---------
|
|
|
|
Optimize the case where hundred workers claim the same symlink with the same
|
|
priority.
|
|
|
|
Each time a new device claimed a symlink, the worker searched for the device
|
|
claiming the symlink with the highest prio by examining the prio of each device
|
|
(including the new one) and recreate the symlink accordingly. However this can
|
|
be improved by stopping the search as soon as it encounters a device with the
|
|
same prio since in this case it can assume that the symlink is currently owned
|
|
by a device with equal or greater prio.
|
|
|
|
[fbui: fixes bsc#1203141]
|
|
---
|
|
src/udev/udev-event.c | 9 +-
|
|
src/udev/udev-node.c | 321 ++++++++++--------------------------------
|
|
2 files changed, 82 insertions(+), 248 deletions(-)
|
|
|
|
diff --git a/src/udev/udev-event.c b/src/udev/udev-event.c
|
|
index 2661ed7933..173cd37618 100644
|
|
--- a/src/udev/udev-event.c
|
|
+++ b/src/udev/udev-event.c
|
|
@@ -1051,10 +1051,6 @@ int udev_event_execute_rules(
|
|
if (r < 0)
|
|
return r;
|
|
|
|
- r = update_devnode(event);
|
|
- if (r < 0)
|
|
- return r;
|
|
-
|
|
/* preserve old, or get new initialization timestamp */
|
|
r = device_ensure_usec_initialized(dev, event->dev_db_clone);
|
|
if (r < 0)
|
|
@@ -1069,8 +1065,11 @@ int udev_event_execute_rules(
|
|
if (r < 0)
|
|
return log_device_debug_errno(dev, r, "Failed to update database under /run/udev/data/: %m");
|
|
|
|
- device_set_is_initialized(dev);
|
|
+ r = update_devnode(event);
|
|
+ if (r < 0)
|
|
+ return r;
|
|
|
|
+ device_set_is_initialized(dev);
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c
|
|
index d9309efa25..7be933a90f 100644
|
|
--- a/src/udev/udev-node.c
|
|
+++ b/src/udev/udev-node.c
|
|
@@ -17,26 +17,19 @@
|
|
#include "format-util.h"
|
|
#include "fs-util.h"
|
|
#include "hexdecoct.h"
|
|
+#include "lockfile-util.h"
|
|
#include "mkdir.h"
|
|
-#include "parse-util.h"
|
|
#include "path-util.h"
|
|
-#include "random-util.h"
|
|
#include "selinux-util.h"
|
|
#include "smack-util.h"
|
|
#include "stat-util.h"
|
|
#include "stdio-util.h"
|
|
#include "string-util.h"
|
|
#include "strxcpyx.h"
|
|
-#include "time-util.h"
|
|
#include "udev-node.h"
|
|
#include "user-util.h"
|
|
|
|
#define CREATE_LINK_MAX_RETRIES 128
|
|
-#define LINK_UPDATE_MAX_RETRIES 128
|
|
-#define CREATE_STACK_LINK_MAX_RETRIES 128
|
|
-#define UPDATE_TIMESTAMP_MAX_RETRIES 128
|
|
-#define MAX_RANDOM_DELAY (250 * USEC_PER_MSEC)
|
|
-#define MIN_RANDOM_DELAY ( 50 * USEC_PER_MSEC)
|
|
#define UDEV_NODE_HASH_KEY SD_ID128_MAKE(b9,6a,f1,ce,40,31,44,1a,9e,19,ec,8b,ae,f3,e3,2f)
|
|
|
|
static int create_symlink(const char *target, const char *slink) {
|
|
@@ -122,8 +115,14 @@ static int link_find_prioritized(sd_device *dev, bool add, const char *stackdir,
|
|
assert(stackdir);
|
|
assert(ret);
|
|
|
|
- /* Find device node of device with highest priority. This returns 1 if a device found, 0 if no
|
|
- * device found, or a negative errno. */
|
|
+ /* When 'add' is true, search for a device having the same or a higher prio. If such a device is
|
|
+ * found, return 0 and 'ret' is unchanged. Otherwise return 1 and 'ret' contains the device node of
|
|
+ * the passed device.
|
|
+ *
|
|
+ * When 'add' is false, find device node of device with highest priority. This returns 1 if a device
|
|
+ * found 0 if no device found.
|
|
+ *
|
|
+ * In both cases it returns a negative errno in case of errors. */
|
|
|
|
if (add) {
|
|
const char *devnode;
|
|
@@ -139,6 +138,8 @@ static int link_find_prioritized(sd_device *dev, bool add, const char *stackdir,
|
|
target = strdup(devnode);
|
|
if (!target)
|
|
return -ENOMEM;
|
|
+
|
|
+ log_device_debug(dev, "Attempting to claim '%s' with priority %i", stackdir, priority);
|
|
}
|
|
|
|
dir = opendir(stackdir);
|
|
@@ -157,67 +158,50 @@ static int link_find_prioritized(sd_device *dev, bool add, const char *stackdir,
|
|
return r;
|
|
|
|
FOREACH_DIRENT_ALL(dent, dir, break) {
|
|
- _cleanup_free_ char *path = NULL, *buf = NULL;
|
|
- int tmp_prio;
|
|
+ _cleanup_(sd_device_unrefp) sd_device *dev_db = NULL;
|
|
+ const char *devnode;
|
|
+ int db_prio = 0;
|
|
|
|
+ if (dent->d_name[0] == '\0')
|
|
+ break;
|
|
if (dent->d_name[0] == '.')
|
|
continue;
|
|
|
|
- /* skip ourself */
|
|
+ /* did we find ourself? */
|
|
if (streq(dent->d_name, id))
|
|
continue;
|
|
|
|
- path = path_join(stackdir, dent->d_name);
|
|
- if (!path)
|
|
- return -ENOMEM;
|
|
-
|
|
- if (readlink_malloc(path, &buf) >= 0) {
|
|
- char *devnode;
|
|
-
|
|
- /* New format. The devnode and priority can be obtained from symlink. */
|
|
-
|
|
- devnode = strchr(buf, ':');
|
|
- if (!devnode || devnode == buf)
|
|
- continue;
|
|
+ log_device_debug(dev, "Found '%s' claiming '%s'", dent->d_name, stackdir);
|
|
|
|
- *(devnode++) = '\0';
|
|
- if (!path_startswith(devnode, "/dev"))
|
|
- continue;
|
|
-
|
|
- if (safe_atoi(buf, &tmp_prio) < 0)
|
|
- continue;
|
|
-
|
|
- if (target && tmp_prio <= priority)
|
|
- continue;
|
|
-
|
|
- r = free_and_strdup(&target, devnode);
|
|
- if (r < 0)
|
|
- return r;
|
|
- } else {
|
|
- _cleanup_(sd_device_unrefp) sd_device *tmp_dev = NULL;
|
|
- const char *devnode;
|
|
+ if (sd_device_new_from_device_id(&dev_db, dent->d_name) < 0)
|
|
+ continue;
|
|
|
|
- /* Old format. The devnode and priority must be obtained from uevent and
|
|
- * udev database files. */
|
|
+ if (sd_device_get_devname(dev_db, &devnode) < 0)
|
|
+ continue;
|
|
|
|
- if (sd_device_new_from_device_id(&tmp_dev, dent->d_name) < 0)
|
|
- continue;
|
|
+ if (device_get_devlink_priority(dev_db, &db_prio) < 0)
|
|
+ continue;
|
|
|
|
- if (device_get_devlink_priority(tmp_dev, &tmp_prio) < 0)
|
|
- continue;
|
|
+ if (target && db_prio < priority)
|
|
+ continue;
|
|
|
|
- if (target && tmp_prio <= priority)
|
|
- continue;
|
|
+ if (add) {
|
|
+ assert(target);
|
|
+ /* Exit early as we found an existing device claiming the symlink with same or higher
|
|
+ * prio. */
|
|
+ log_device_debug(dev, "Giving up '%s' in favor of %s (prio=%i)", stackdir, dent->d_name, db_prio);
|
|
+ return 0;
|
|
+ }
|
|
|
|
- if (sd_device_get_devname(tmp_dev, &devnode) < 0)
|
|
- continue;
|
|
+ if (target && db_prio == priority)
|
|
+ continue;
|
|
|
|
- r = free_and_strdup(&target, devnode);
|
|
- if (r < 0)
|
|
- return r;
|
|
- }
|
|
+ log_device_debug(dev_db, "Device claims priority %i for '%s'", db_prio, stackdir);
|
|
|
|
- priority = tmp_prio;
|
|
+ r = free_and_strdup(&target, devnode);
|
|
+ if (r < 0)
|
|
+ return r;
|
|
+ priority = db_prio;
|
|
}
|
|
|
|
*ret = TAKE_PTR(target);
|
|
@@ -266,160 +250,12 @@ toolong:
|
|
return size - 1;
|
|
}
|
|
|
|
-static int update_timestamp(sd_device *dev, const char *path, struct stat *prev) {
|
|
- assert(path);
|
|
- assert(prev);
|
|
-
|
|
- /* Even if a symlink in the stack directory is created/removed, the mtime of the directory may
|
|
- * not be changed. Why? Let's consider the following situation. For simplicity, let's assume
|
|
- * there exist two udev workers (A and B) and all of them calls link_update() for the same
|
|
- * devlink simultaneously.
|
|
- *
|
|
- * 1. A creates/removes a symlink in the stack directory.
|
|
- * 2. A calls the first stat() in the loop of link_update().
|
|
- * 3. A calls link_find_prioritized().
|
|
- * 4. B creates/removes another symlink in the stack directory, so the result of the step 3 is outdated.
|
|
- * 5. B finishes link_update().
|
|
- * 6. A creates/removes devlink according to the outdated result in the step 3.
|
|
- * 7. A calls the second stat() in the loop of link_update().
|
|
- *
|
|
- * If these 7 steps are processed in this order within a short time period that kernel's timer
|
|
- * does not increase, then even if the contents in the stack directory is changed, the results
|
|
- * of two stat() called by A shows the same timestamp, and A cannot detect the change.
|
|
- *
|
|
- * By calling this function after creating/removing symlinks in the stack directory, the
|
|
- * timestamp of the stack directory is always increased at least in the above step 5, so A can
|
|
- * detect the update. */
|
|
-
|
|
- if ((prev->st_mode & S_IFMT) == 0)
|
|
- return 0; /* Does not exist, or previous stat() failed. */
|
|
-
|
|
- for (unsigned i = 0; i < UPDATE_TIMESTAMP_MAX_RETRIES; i++) {
|
|
- struct stat st;
|
|
-
|
|
- if (stat(path, &st) < 0)
|
|
- return -errno;
|
|
-
|
|
- if (!stat_inode_unmodified(prev, &st))
|
|
- return 0;
|
|
-
|
|
- log_device_debug(dev,
|
|
- "%s is modified, but its timestamp is not changed, "
|
|
- "updating timestamp after 10ms.",
|
|
- path);
|
|
-
|
|
- (void) usleep(10 * USEC_PER_MSEC);
|
|
- if (utimensat(AT_FDCWD, path, NULL, 0) < 0)
|
|
- return -errno;
|
|
- }
|
|
-
|
|
- return -ELOOP;
|
|
-}
|
|
-
|
|
-static int update_stack_directory(sd_device *dev, const char *dirname, bool add) {
|
|
- _cleanup_free_ char *filename = NULL, *data = NULL, *buf = NULL;
|
|
- const char *devname, *id;
|
|
- struct stat st = {};
|
|
- int priority, r;
|
|
-
|
|
- assert(dev);
|
|
- assert(dirname);
|
|
-
|
|
- r = device_get_device_id(dev, &id);
|
|
- if (r < 0)
|
|
- return log_device_debug_errno(dev, r, "Failed to get device id: %m");
|
|
-
|
|
- filename = path_join(dirname, id);
|
|
- if (!filename)
|
|
- return log_oom_debug();
|
|
-
|
|
- if (!add) {
|
|
- int unlink_error = 0, stat_error = 0;
|
|
-
|
|
- if (stat(dirname, &st) < 0) {
|
|
- if (errno == ENOENT)
|
|
- return 0; /* The stack directory is already removed. That's OK. */
|
|
- stat_error = -errno;
|
|
- }
|
|
-
|
|
- if (unlink(filename) < 0)
|
|
- unlink_error = -errno;
|
|
-
|
|
- if (rmdir(dirname) >= 0 || errno == ENOENT)
|
|
- return 0;
|
|
-
|
|
- if (unlink_error < 0) {
|
|
- if (unlink_error == -ENOENT)
|
|
- return 0;
|
|
-
|
|
- /* If we failed to remove the symlink, then there is almost nothing we can do. */
|
|
- return log_device_debug_errno(dev, unlink_error, "Failed to remove %s: %m", filename);
|
|
- }
|
|
-
|
|
- if (stat_error < 0)
|
|
- return log_device_debug_errno(dev, stat_error, "Failed to stat %s: %m", dirname);
|
|
-
|
|
- /* The symlink was removed. Check if the timestamp of directory is changed. */
|
|
- r = update_timestamp(dev, dirname, &st);
|
|
- if (r < 0 && r != -ENOENT)
|
|
- return log_device_debug_errno(dev, r, "Failed to update timestamp of %s: %m", dirname);
|
|
-
|
|
- return 0;
|
|
- }
|
|
-
|
|
- r = sd_device_get_devname(dev, &devname);
|
|
- if (r < 0)
|
|
- return log_device_debug_errno(dev, r, "Failed to get device node: %m");
|
|
-
|
|
- r = device_get_devlink_priority(dev, &priority);
|
|
- if (r < 0)
|
|
- return log_device_debug_errno(dev, r, "Failed to get priority of device node symlink: %m");
|
|
-
|
|
- if (asprintf(&data, "%i:%s", priority, devname) < 0)
|
|
- return log_oom_debug();
|
|
-
|
|
- if (readlink_malloc(filename, &buf) >= 0 && streq(buf, data))
|
|
- return 0;
|
|
-
|
|
- if (unlink(filename) < 0 && errno != ENOENT)
|
|
- log_device_debug_errno(dev, errno, "Failed to remove %s, ignoring: %m", filename);
|
|
-
|
|
- for (unsigned j = 0; j < CREATE_STACK_LINK_MAX_RETRIES; j++) {
|
|
- /* This may fail with -ENOENT when the parent directory is removed during
|
|
- * creating the file by another udevd worker. */
|
|
- r = mkdir_p(dirname, 0755);
|
|
- if (r == -ENOENT)
|
|
- continue;
|
|
- if (r < 0)
|
|
- return log_device_debug_errno(dev, r, "Failed to create directory %s: %m", dirname);
|
|
-
|
|
- if (stat(dirname, &st) < 0) {
|
|
- if (errno == ENOENT)
|
|
- continue;
|
|
- return log_device_debug_errno(dev, errno, "Failed to stat %s: %m", dirname);
|
|
- }
|
|
-
|
|
- if (symlink(data, filename) < 0) {
|
|
- if (errno == ENOENT)
|
|
- continue;
|
|
- return log_device_debug_errno(dev, errno, "Failed to create symbolic link %s: %m", filename);
|
|
- }
|
|
-
|
|
- /* The symlink was created. Check if the timestamp of directory is changed. */
|
|
- r = update_timestamp(dev, dirname, &st);
|
|
- if (r < 0)
|
|
- return log_device_debug_errno(dev, r, "Failed to update timestamp of %s: %m", dirname);
|
|
-
|
|
- return 0;
|
|
- }
|
|
-
|
|
- return log_device_debug_errno(dev, SYNTHETIC_ERRNO(ELOOP), "Failed to create symbolic link %s: %m", filename);
|
|
-}
|
|
-
|
|
/* manage "stack of names" with possibly specified device priorities */
|
|
static int link_update(sd_device *dev, const char *slink_in, bool add) {
|
|
- _cleanup_free_ char *slink = NULL, *dirname = NULL;
|
|
- const char *slink_name;
|
|
+ _cleanup_(release_lock_file) LockFile lf = LOCK_FILE_INIT;
|
|
+ _cleanup_free_ char *slink = NULL, *filename = NULL, *dirname = NULL;
|
|
+ _cleanup_free_ char *target = NULL;
|
|
+ const char *slink_name, *id;
|
|
char name_enc[NAME_MAX+1];
|
|
int r;
|
|
|
|
@@ -439,57 +275,56 @@ static int link_update(sd_device *dev, const char *slink_in, bool add) {
|
|
return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EINVAL),
|
|
"Invalid symbolic link of device node: %s", slink);
|
|
|
|
+ r = device_get_device_id(dev, &id);
|
|
+ if (r < 0)
|
|
+ return log_device_debug_errno(dev, r, "Failed to get device id: %m");
|
|
+
|
|
(void) udev_node_escape_path(slink_name, name_enc, sizeof(name_enc));
|
|
dirname = path_join("/run/udev/links", name_enc);
|
|
if (!dirname)
|
|
return log_oom_debug();
|
|
|
|
- r = update_stack_directory(dev, dirname, add);
|
|
- if (r < 0)
|
|
- return r;
|
|
-
|
|
- for (unsigned i = 0; i < LINK_UPDATE_MAX_RETRIES; i++) {
|
|
- _cleanup_free_ char *target = NULL;
|
|
- struct stat st1 = {}, st2 = {};
|
|
-
|
|
- if (i > 0) {
|
|
- char buf[FORMAT_TIMESPAN_MAX];
|
|
- usec_t delay = MIN_RANDOM_DELAY + random_u64_range(MAX_RANDOM_DELAY - MIN_RANDOM_DELAY);
|
|
+ filename = path_join(dirname, id);
|
|
+ if (!filename)
|
|
+ return log_oom_debug();
|
|
|
|
- log_device_debug(dev, "Directory %s was updated, retrying to update devlink %s after %s.",
|
|
- dirname, slink, format_timespan(buf, sizeof(buf), delay, USEC_PER_MSEC));
|
|
- (void) usleep(delay);
|
|
- }
|
|
+ mkdir_parents(dirname, 0755);
|
|
+ r = make_lock_file_for(dirname, LOCK_EX, &lf);
|
|
+ if (r < 0)
|
|
+ return log_error_errno(r, "failed to lock %s", dirname);
|
|
|
|
- if (stat(dirname, &st1) < 0 && errno != ENOENT)
|
|
- return log_device_debug_errno(dev, errno, "Failed to stat %s: %m", dirname);
|
|
+ if (!add) {
|
|
+ if (unlink(filename) < 0 && errno != ENOENT)
|
|
+ log_device_error_errno(dev, errno, "Failed to remove %s, ignoring: %m", filename);
|
|
|
|
- r = link_find_prioritized(dev, add, dirname, &target);
|
|
+ (void) rmdir(dirname);
|
|
+ } else {
|
|
+ r = touch_file(filename, true, USEC_INFINITY, UID_INVALID, GID_INVALID, 0444);
|
|
if (r < 0)
|
|
- return log_device_debug_errno(dev, r, "Failed to determine device node with the highest priority for '%s': %m", slink);
|
|
- if (r == 0) {
|
|
+ return log_device_error_errno(dev, r, "Failed to create %s: %m", filename);
|
|
+ }
|
|
+
|
|
+ r = link_find_prioritized(dev, add, dirname, &target);
|
|
+ if (r < 0)
|
|
+ return log_device_error_errno(dev, r, "Failed to find highest priority for symlink '%s': %m", slink);
|
|
+ if (r == 0) {
|
|
+ if (!add) {
|
|
log_device_debug(dev, "No reference left for '%s', removing", slink);
|
|
|
|
- if (unlink(slink) < 0 && errno != ENOENT)
|
|
- log_device_debug_errno(dev, errno, "Failed to remove '%s', ignoring: %m", slink);
|
|
+ if (unlink(slink) < 0)
|
|
+ return log_device_error_errno(dev, errno,
|
|
+ "Failed to remove '%s', ignoring: %m", slink);
|
|
|
|
(void) rmdir_parents(slink, "/dev");
|
|
- return 0;
|
|
}
|
|
-
|
|
- r = node_symlink(dev, target, slink);
|
|
- if (r < 0)
|
|
- return r;
|
|
-
|
|
- if (stat(dirname, &st2) < 0 && errno != ENOENT)
|
|
- return log_device_debug_errno(dev, errno, "Failed to stat %s: %m", dirname);
|
|
-
|
|
- if (((st1.st_mode & S_IFMT) == 0 && (st2.st_mode & S_IFMT) == 0) ||
|
|
- stat_inode_unmodified(&st1, &st2))
|
|
- return 0;
|
|
+ return 0;
|
|
}
|
|
|
|
- return -ELOOP;
|
|
+ r = node_symlink(dev, target, slink);
|
|
+ if (r < 0)
|
|
+ (void) unlink(filename);
|
|
+
|
|
+ return r;
|
|
}
|
|
|
|
static int device_get_devpath_by_devnum(sd_device *dev, char **ret) {
|
|
--
|
|
2.35.3
|
|
|