Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1750946AbdCNKlA (ORCPT ); Tue, 14 Mar 2017 06:41:00 -0400 Received: from mail-ot0-f169.google.com ([74.125.82.169]:36806 "EHLO mail-ot0-f169.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750785AbdCNKk6 (ORCPT ); Tue, 14 Mar 2017 06:40:58 -0400 MIME-Version: 1.0 In-Reply-To: <20170313231624.uhqwehzihqnakked@rgvaio> References: <20170313231624.uhqwehzihqnakked@rgvaio> From: Amir Goldstein Date: Tue, 14 Mar 2017 12:40:56 +0200 Message-ID: Subject: Re: [RFC 1/2] fanotify: new event FAN_MODIFY_DIR To: =?UTF-8?B?RmlsaXAgxaB0xJtkcm9uc2vDvQ==?= Cc: linux-fsdevel , linux-kernel , Jan Kara , Alexander Viro Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: 8bit X-MIME-Autoconverted: from quoted-printable to 8bit by mail.home.local id v2EAfFaU011700 Content-Length: 9786 Lines: 281 On Tue, Mar 14, 2017 at 1:16 AM, Filip Štědronský wrote: > An example userspace program that uses FAN_MODIFY_DIR to reliably keep > an up-to-date internal representation of the file system. It uses some > filehandle trickery to identify inodes, other heuristics could be also > used. > An I am very happy that you used filehandles to keep track of objects in your example, because it fits my proposal really well. See, if you used my proposed API, you would have had fan_fd = CHK(fanotify_init(FAN_UNLIMITED_QUEUE| \ FAN_EVENT_INFO_PARENT | FAN_EVENT_INFO_FH, O_RDONLY)); And then you would have gotten the filhandle of parent with the event and you would need find_inode(). Furthermore, my proposal records the filehandle at the time of the event and therefore, does not have to keep an elevated refcount of the object in the events queue. Again, this needs some work, but I will try to post a testing branch which demonstrates how my patches should be used with this test program. Thanks again for sharing it. Amir. > --- > > //#define _GNU_SOURCE > > #include > #include > #include > #include > #include > #include > #include > #include > #include > #include > #include > > #include > #include > #include > using namespace std; > > #ifndef FAN_MODIFY_DIR > #define FAN_MODIFY_DIR 0x00040000 > #endif > > // die-on-error helpers > #define CHK(x) ({ __typeof__(x) r = x; if (r == -1) { perror(#x); abort(); } r; }) > #define CHKN(x) ({ __typeof__(x) r = x; if (r == NULL) { perror(#x); abort(); } r; }) > struct inode_info; > struct dentry_info; > > struct inode_info { > ino_t ino; > mode_t mode; > char handle[MAX_HANDLE_SZ]; > set links; > map children; // for directory inodes > }; > > struct dentry_info { > struct inode_info *parent, *inode; > string name; > }; > > > map inodes; > > int root_fd; > int fan_fd; > > bool compare_handles(const void *h1, const void *h2) { > const struct file_handle *fh1 = (const struct file_handle*) h1; > const struct file_handle *fh2 = (const struct file_handle*) h2; > return (fh1->handle_bytes == fh2->handle_bytes > && memcmp(h1, h2, fh1->handle_bytes) == 0); > } > > bool handle_valid(void *handle) { > int check_fd = open_by_handle_at(root_fd, (struct file_handle*)handle, O_PATH); > if (check_fd >= 0) { > CHK(close(check_fd)); > return true; > } else if (errno == ESTALE) { > return false; > } else { > perror("open_by_handle_at"); > exit(1); > } > } > > // Get the path corresponding to an inode (one of its paths, in the presence of > // hardlinks). > void inode_path(const struct inode_info *inode, char *buf, size_t bufsiz) { > list components; > while (true) { > if (inode->links.empty()) break; > struct dentry_info *dentry = *inode->links.begin(); > components.push_front(dentry->name); > inode = dentry->parent; > } > buf[0] = '\0'; > for (auto name: components) { > int len = snprintf(buf, bufsiz, "/%s", name.c_str()); > buf += len; > bufsiz -= len; > } > } > > > void delete_dentry(struct dentry_info *dentry) { > assert(dentry->parent->children[dentry->name] == dentry); > > char path_buf[4096]; > inode_path(dentry->parent, path_buf, sizeof(path_buf)); > printf("unlinked %s/%s (ino %lu, parent %lu)\n", path_buf, dentry->name.c_str(), > dentry->inode->ino, dentry->parent->ino); > > dentry->parent->children.erase(dentry->name.c_str()); > dentry->inode->links.erase(dentry); > // TODO: If this was the last dentry pointing to an inode, schedule removing > // the inode after a timeout (we cannot remove it immediately because > // the zero-link situation might occur during a rename when the source > // directory has been processed but the target directory hasn't). > delete dentry; > } > > struct dentry_info *add_dentry(struct inode_info *parent, const char *name, > struct inode_info *child) { > struct dentry_info *dentry = new dentry_info(); > dentry->parent = parent; > dentry->name = name; > dentry->inode = child; > parent->children[name] = dentry; > child->links.insert(dentry); > > char path_buf[4096] = "\0"; > inode_path(parent, path_buf, sizeof(path_buf)); > printf("linked %s/%s (ino %lu, parent %lu)\n", path_buf, name, child->ino, parent->ino); > > return dentry; > } > > void delete_inode(struct inode_info *inode) { > for (auto dentry: inode->links) { > delete_dentry(dentry); > } > delete inode; > } > > // Given a file descriptor, find the corresponding inode object in our database, > // or create a new one if it does not exist. An O_PATH fd suffices. > struct inode_info *find_inode(int fd) { > struct stat st; > CHK(fstat(fd, &st)); > char handle[sizeof(struct file_handle) + MAX_HANDLE_SZ]; > struct file_handle *fh = (struct file_handle*)handle; > fh->handle_bytes = sizeof(handle); > int mntid; > CHK(name_to_handle_at(fd, "", (struct file_handle*)handle, &mntid, > AT_EMPTY_PATH)); > > struct inode_info *info = inodes[st.st_ino]; > if (info) { > // Handles can refer to the same file despite not being equal. > // If the old handle can still be opened, we can be assured > // that the inode number has not been recycled. > if (compare_handles(handle, info->handle) || handle_valid(info->handle)) { > return info; > } else { > delete_inode(info); > info = NULL; > } > } > > inodes[st.st_ino] = info = new inode_info(); > info->ino = st.st_ino; > info->mode = st.st_mode; > memcpy(info->handle, handle, fh->handle_bytes); > return info; > } > > // Scan directory and update internal filesystem representation accordingly. > // Closes `dirfd`. > void scan(int dirfd, bool recursive) { > struct inode_info *dir = find_inode(dirfd); > > char path_buf[4096] = "\0"; > inode_path(dir, path_buf, sizeof(path_buf)); > printf("scan %s (%lu)\n", path_buf, dir->ino); > > DIR *dp = CHKN(fdopendir(dirfd)); > set seen; > while (struct dirent *ent = readdir(dp)) { > if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue; > seen.insert(ent->d_name); > if (dir->children.find(ent->d_name) != dir->children.end() > && dir->children[ent->d_name]->inode->ino == ent->d_ino) { > // Heuristic: It is massively unlikely that an inode number > // would be recylced at the same path as before. So if we > // see the same inode for the same child, we skip the more > // expensive checks altogether. This saves us a buttload of > // syscalls, especially given that most directory entries > // will be unchanged after a FAN_MODIFY_DIR. > // > // This can be skipped if strict correctness is preferred > // over speed. > continue; > } > int fd = openat(dirfd, ent->d_name, O_PATH|O_NOFOLLOW); > if (fd < 0) continue; > struct inode_info *child = find_inode(fd); > if (dir->children.find(ent->d_name) != dir->children.end()) { > struct dentry_info *old_dentry = dir->children[ent->d_name]; > if (child != old_dentry->inode) { > delete_dentry(old_dentry); > add_dentry(dir, ent->d_name, child); > } > } else { > add_dentry(dir, ent->d_name, child); > } > if (recursive && S_ISDIR(child->mode)) { > // `fd' is just an O_PATH fd. For scanning we need O_RDONLY. > int scan_fd = CHK(openat(fd, ".", O_RDONLY|O_DIRECTORY)); > scan(scan_fd, true); // closes scan_fd > } > close(fd); > } > for (auto it: dir->children) { > if (seen.find(it.second->name) == seen.end()) delete_dentry(it.second); > } > closedir(dp); > } > > void event_loop() { > while (true) { > char buf[4096]; > ssize_t len = CHK(read(fan_fd, buf, sizeof(buf))); > const struct fanotify_event_metadata *event; > event = (const struct fanotify_event_metadata*) buf; > while (FAN_EVENT_OK(event, len)) { > if (event->vers != FANOTIFY_METADATA_VERSION) abort(); > if (event->mask & FAN_MODIFY_DIR) { > scan(event->fd, false); > } else if (event->mask & FAN_Q_OVERFLOW) { > abort(); // TODO: full rescan needed > } else { > close(event->fd); > } > event = FAN_EVENT_NEXT(event, len); > } > } > } > > int main(int argc, char **argv) { > if (argc != 2) { fprintf(stderr, "Usage: %s MOUNTPOINT\n", argv[0]); return 1; } > > root_fd = CHK(open(argv[1], O_RDONLY|O_DIRECTORY)); > // In a real application, FAN_UNLIMITED_QUEUE would be replaced with a secondary > // userspace queue filled during scanning. > fan_fd = CHK(fanotify_init(FAN_UNLIMITED_QUEUE, O_RDONLY)); > CHK(fanotify_mark(fan_fd, FAN_MARK_ADD|FAN_MARK_MOUNT, FAN_MODIFY_DIR|FAN_ONDIR, > root_fd, NULL)); > > scan(dup(root_fd), true); > > event_loop(); > > return 0; > }