Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 912745 Details for
Bug 850426
gfs2: Add xgetdents syscall to the kernel
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
Revised readahead syscall patch after discussions with swhiteho
readahead-async-works-vfs-alloc.patch (text/plain), 13.42 KB, created by
Abhijith Das
on 2014-06-27 10:05:27 UTC
(
hide
)
Description:
Revised readahead syscall patch after discussions with swhiteho
Filename:
MIME Type:
Creator:
Abhijith Das
Created:
2014-06-27 10:05:27 UTC
Size:
13.42 KB
patch
obsolete
>diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl >index d6b8679..3e0ef85 100644 >--- a/arch/x86/syscalls/syscall_32.tbl >+++ b/arch/x86/syscalls/syscall_32.tbl >@@ -360,3 +360,4 @@ > 351 i386 sched_setattr sys_sched_setattr > 352 i386 sched_getattr sys_sched_getattr > 353 i386 renameat2 sys_renameat2 >+354 i386 dirreadahead sys_dirreadahead >diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl >index 04376ac..11d516d2 100644 >--- a/arch/x86/syscalls/syscall_64.tbl >+++ b/arch/x86/syscalls/syscall_64.tbl >@@ -323,6 +323,7 @@ > 314 common sched_setattr sys_sched_setattr > 315 common sched_getattr sys_sched_getattr > 316 common renameat2 sys_renameat2 >+317 common dirreadahead sys_dirreadahead > > # > # x32-specific system call numbers start at 512 to avoid cache impact >diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile >index 8612820..2765c83 100644 >--- a/fs/gfs2/Makefile >+++ b/fs/gfs2/Makefile >@@ -4,7 +4,8 @@ gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ > glops.o log.o lops.o main.o meta_io.o \ > aops.o dentry.o export.o file.o \ > ops_fstype.o inode.o quota.o \ >- recovery.o rgrp.o super.o sys.o trans.o util.o >+ recovery.o rgrp.o super.o sys.o trans.o \ >+ dir_readahead.o util.o > > gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o > >diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c >index 1a349f9..1392a23 100644 >--- a/fs/gfs2/dir.c >+++ b/fs/gfs2/dir.c >@@ -1217,6 +1217,20 @@ static int compare_dents(const void *a, const void *b) > return ret; > } > >+static int gfs2_dirent_dot_or_dotdot(const struct gfs2_dirent *dent) >+{ >+ const char *name = (char *)(dent + 1); >+ >+ if (be16_to_cpu(dent->de_type) == DT_DIR) { >+ if (be16_to_cpu(dent->de_name_len) == 1 && name[0] == '.') >+ return 1; >+ if (be16_to_cpu(dent->de_name_len) == 2 && >+ strncmp(name, "..", 2) == 0) >+ return 1; >+ } >+ return 0; >+} >+ > /** > * do_filldir_main - read out directory entries > * @dip: The GFS2 inode >@@ -1273,11 +1287,18 @@ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, > ctx->pos = off; > } > >- if (!dir_emit(ctx, (const char *)(dent + 1), >- be16_to_cpu(dent->de_name_len), >- be64_to_cpu(dent->de_inum.no_addr), >- be16_to_cpu(dent->de_type))) >- return 1; >+ if (ctx->actor) { >+ if (!dir_emit(ctx, (const char *)(dent + 1), >+ be16_to_cpu(dent->de_name_len), >+ be64_to_cpu(dent->de_inum.no_addr), >+ be16_to_cpu(dent->de_type))) >+ return 1; >+ } else { /* we were called by dir_readahead */ >+ if (gfs2_dirent_dot_or_dotdot(dent)) >+ continue; >+ if (collect_inode_blocks(ctx, be64_to_cpu(dent->de_inum.no_addr))) >+ return 1; >+ } > > *copied = 1; > } >@@ -1311,8 +1332,7 @@ static void gfs2_free_sort_buffer(void *ptr) > } > > static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, >- int *copied, unsigned *depth, >- u64 leaf_no) >+ int *copied, unsigned *depth, u64 leaf_no) > { > struct gfs2_inode *ip = GFS2_I(inode); > struct gfs2_sbd *sdp = GFS2_SB(inode); >@@ -1399,14 +1419,14 @@ out: > } > > /** >- * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks. >+ * gfs2_dir_leaf_ra - Issue read-ahead requests for leaf blocks. > * > * Note: we can't calculate each index like dir_e_read can because we don't > * have the leaf, and therefore we don't have the depth, and therefore we > * don't have the length. So we have to just read enough ahead to make up > * for the loss of information. > */ >-static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, >+static void gfs2_dir_leaf_ra(struct inode *inode, unsigned hsize, u32 index, > struct file_ra_state *f_ra) > { > struct gfs2_inode *ip = GFS2_I(inode); >@@ -1474,11 +1494,10 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx, > if (IS_ERR(lp)) > return PTR_ERR(lp); > >- gfs2_dir_readahead(inode, hsize, index, f_ra); >+ gfs2_dir_leaf_ra(inode, hsize, index, f_ra); > > while (index < hsize) { >- error = gfs2_dir_read_leaf(inode, ctx, >- &copied, &depth, >+ error = gfs2_dir_read_leaf(inode, ctx, &copied, &depth, > be64_to_cpu(lp[index])); > if (error) > break; >diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h >index 126c65d..65fdafd 100644 >--- a/fs/gfs2/dir.h >+++ b/fs/gfs2/dir.h >@@ -25,6 +25,10 @@ struct gfs2_diradd { > struct buffer_head *bh; > }; > >+extern struct workqueue_struct *gfs2_dir_ra_wq; >+ >+extern int gfs2_dir_readahead(struct file *file, struct dir_context *ctx); >+extern int collect_inode_blocks(struct dir_context *ctx, u64 offset); > extern struct inode *gfs2_dir_search(struct inode *dir, > const struct qstr *filename, > bool fail_on_exist); >diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c >index 6ab0cfb..ecf7441 100644 >--- a/fs/gfs2/file.c >+++ b/fs/gfs2/file.c >@@ -1077,6 +1077,7 @@ const struct file_operations gfs2_file_fops = { > > const struct file_operations gfs2_dir_fops = { > .iterate = gfs2_readdir, >+ .dir_readahead = gfs2_dir_readahead, > .unlocked_ioctl = gfs2_ioctl, > .open = gfs2_open, > .release = gfs2_release, >@@ -1107,6 +1108,7 @@ const struct file_operations gfs2_file_fops_nolock = { > > const struct file_operations gfs2_dir_fops_nolock = { > .iterate = gfs2_readdir, >+ .dir_readahead = gfs2_dir_readahead, > .unlocked_ioctl = gfs2_ioctl, > .open = gfs2_open, > .release = gfs2_release, >diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c >index 82b6ac8..71e8ce5 100644 >--- a/fs/gfs2/main.c >+++ b/fs/gfs2/main.c >@@ -161,9 +161,14 @@ static int __init init_gfs2_fs(void) > if (!gfs2_control_wq) > goto fail_recovery; > >+ gfs2_dir_ra_wq = alloc_workqueue("gfs2_dir_ra", >+ WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); >+ if (!gfs2_dir_ra_wq) >+ goto fail_control; >+ > gfs2_page_pool = mempool_create_page_pool(64, 0); > if (!gfs2_page_pool) >- goto fail_control; >+ goto fail_ra; > > gfs2_register_debugfs(); > >@@ -171,6 +176,8 @@ static int __init init_gfs2_fs(void) > > return 0; > >+fail_ra: >+ destroy_workqueue(gfs2_dir_ra_wq); > fail_control: > destroy_workqueue(gfs2_control_wq); > fail_recovery: >@@ -224,6 +231,7 @@ static void __exit exit_gfs2_fs(void) > unregister_filesystem(&gfs2meta_fs_type); > destroy_workqueue(gfs_recovery_wq); > destroy_workqueue(gfs2_control_wq); >+ destroy_workqueue(gfs2_dir_ra_wq); > list_lru_destroy(&gfs2_qd_lru); > > rcu_barrier(); >diff --git a/fs/readdir.c b/fs/readdir.c >index 5b53d99..96e4a4b 100644 >--- a/fs/readdir.c >+++ b/fs/readdir.c >@@ -302,3 +302,98 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, > fdput(f); > return error; > } >+ >+static inline unsigned int compute_ra_bufsize(unsigned int count) >+{ >+ /* We normally store 64 bit blocknrs in the buffer */ >+ unsigned int size = count * (sizeof(u64)); >+ >+ if (size > KMALLOC_MAX_SIZE) >+ return KMALLOC_MAX_SIZE; >+ if (size < KMALLOC_MIN_SIZE) >+ return KMALLOC_MIN_SIZE; >+ >+ return size; >+} >+ >+static void uninit_ra_dir_context(struct dir_context *ctx) >+{ >+ if (!ctx) >+ return; >+ if (ctx->ra.buf) >+ kfree(ctx->ra.buf); >+ memset(ctx, 0, sizeof(struct dir_context)); >+} >+ >+static int init_ra_dir_context(struct dir_context *ctx, loff_t off, unsigned int count) >+{ >+ unsigned int bufsize; >+ >+ if (!ctx || !count) >+ return -EINVAL; >+ >+ memset(ctx, 0, sizeof(struct dir_context)); >+ bufsize = compute_ra_bufsize(count); >+ ctx->ra.buf = kmalloc(bufsize, GFP_NOFS | __GFP_NOWARN); >+ if (!ctx->ra.buf) >+ return -ENOMEM; >+ >+ ctx->ra.size = bufsize; >+ ctx->ra.req = count; >+ ctx->pos = off; >+ >+ return 0; >+} >+ >+SYSCALL_DEFINE3(dirreadahead, unsigned int, fd, >+ loff_t __user *, offset, unsigned int, count) >+{ >+ struct fd f; >+ struct inode *inode; >+ int error = -ENOTDIR; >+ loff_t off = 0; >+ struct dir_context ctx; >+ >+ f = fdget(fd); >+ if (!f.file) >+ return -EBADF; >+ >+ inode = f.file->f_path.dentry->d_inode; >+ >+ error = -EINVAL; >+ if (!count) >+ goto out; >+ >+ error = -ENOTSUPP; >+ if (!f.file->f_op || !f.file->f_op->dir_readahead) >+ goto out; >+ >+ error = security_file_permission(f.file, MAY_READ); >+ if (error) >+ goto out; >+ >+ error = -EFAULT; >+ if (__get_user(off, offset)) >+ goto out; >+ >+ error = init_ra_dir_context(&ctx, off, count); >+ if (error) >+ goto out; >+ >+ error = mutex_lock_killable(&inode->i_mutex); >+ if (error) >+ goto out; >+ >+ error = -ENOENT; >+ if (!IS_DEADDIR(inode)) { >+ error = f.file->f_op->dir_readahead(f.file, &ctx); >+ if (__put_user(ctx.pos, offset)) >+ error = -EFAULT; >+ file_accessed(f.file); >+ } >+ mutex_unlock(&inode->i_mutex); >+ uninit_ra_dir_context(&ctx); >+out: >+ fdput(f); >+ return error; >+} >diff --git a/include/linux/fs.h b/include/linux/fs.h >index 7a9c5bc..b0aa510 100644 >--- a/include/linux/fs.h >+++ b/include/linux/fs.h >@@ -1435,9 +1435,18 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); > * to have different dirent layouts depending on the binary type. > */ > typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); >+ >+struct ra_buf { >+ void *buf; /* Buffer used by fs to save inode numbers */ >+ size_t size; /* Size of the buffer */ >+ size_t count; /* Number of valid inode numbers */ >+ unsigned int req; /* Requested number of readaheads */ >+}; >+ > struct dir_context { > const filldir_t actor; > loff_t pos; >+ struct ra_buf ra; > }; > > struct block_device_operations; >@@ -1456,6 +1465,7 @@ struct file_operations { > ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); > ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); > int (*iterate) (struct file *, struct dir_context *); >+ int (*dir_readahead) (struct file *, struct dir_context *); > unsigned int (*poll) (struct file *, struct poll_table_struct *); > long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); > long (*compat_ioctl) (struct file *, unsigned int, unsigned long); >diff --git a/fs/gfs2/dir_readahead.c b/fs/gfs2/dir_readahead.c >new file mode 100644 >index 0000000..ba5e554 >--- /dev/null >+++ b/fs/gfs2/dir_readahead.c >@@ -0,0 +1,152 @@ >+#include <linux/slab.h> >+#include <linux/spinlock.h> >+#include <linux/completion.h> >+#include <linux/buffer_head.h> >+#include <linux/pagemap.h> >+#include <linux/uio.h> >+#include <linux/blkdev.h> >+#include <linux/mm.h> >+#include <linux/mount.h> >+#include <linux/sort.h> >+#include <linux/fs.h> >+#include <linux/gfs2_ondisk.h> >+#include <linux/falloc.h> >+#include <linux/swap.h> >+#include <linux/crc32.h> >+#include <linux/writeback.h> >+#include <asm/uaccess.h> >+#include <linux/dlm.h> >+#include <linux/dlm_plock.h> >+ >+#include "gfs2.h" >+#include "incore.h" >+#include "bmap.h" >+#include "dir.h" >+#include "glock.h" >+#include "glops.h" >+#include "inode.h" >+#include "log.h" >+#include "meta_io.h" >+#include "quota.h" >+#include "rgrp.h" >+#include "trans.h" >+#include "util.h" >+ >+struct workqueue_struct *gfs2_dir_ra_wq; >+ >+static int compare_inos(const void *a, const void *b) >+{ >+ u64 ino_a, ino_b; >+ >+ ino_a = *(u64 *)a; >+ ino_b = *(u64 *)b; >+ >+ if (ino_a > ino_b) >+ return 1; >+ return -1; >+} >+ >+static int collect_more(struct dir_context *ctx) >+{ >+ return (ctx->ra.count < ctx->ra.req && ctx->ra.count < ctx->ra.size); >+} >+ >+int collect_inode_blocks(struct dir_context *ctx, u64 ino) >+{ >+ if (!collect_more(ctx)) { >+ return 1; /* Collected requested blocks */ >+ } >+ ((u64*)ctx->ra.buf)[ctx->ra.count++] = ino; >+ return 0; >+} >+ >+struct dir_ra_work { >+ struct work_struct work; >+ u64 ino; >+ struct gfs2_sbd *sdp; >+}; >+ >+static void dir_ra_work_func(struct work_struct *work) >+{ >+ struct dir_ra_work *w = container_of(work, struct dir_ra_work, work); >+ >+ /* XXX: What to do if sdp disappears by the time we get here? */ >+ struct inode *inode = gfs2_lookup_by_inum(w->sdp, w->ino, NULL, >+ GFS2_BLKST_DINODE); >+ if (IS_ERR(inode)) { >+ fs_err(w->sdp, "can't read in inode at addr:%llu: %ld\n", >+ w->ino, PTR_ERR(inode)); >+ } >+ gfs2_inode_refresh(GFS2_I(inode)); >+ iput(inode); >+ kfree(work); >+} >+ >+int gfs2_queue_dir_ra(struct dir_context *ctx, struct gfs2_sbd *sdp) >+{ >+ int i; >+ >+ sort(ctx->ra.buf, ctx->ra.count, sizeof(u64), compare_inos, NULL); >+ >+ for (i=0; i<ctx->ra.count; i++) { >+ struct dir_ra_work *w; >+ >+ w = kmalloc(sizeof(struct dir_ra_work), GFP_NOFS | __GFP_NOWARN); >+ if (!w) >+ break; >+ >+ w->ino = ((u64*)ctx->ra.buf)[i]; >+ w->sdp = sdp; >+ INIT_WORK(&w->work, dir_ra_work_func); >+ queue_work(gfs2_dir_ra_wq, &w->work); >+ } >+ if (!i) >+ return -ENOMEM; >+ if (i != ctx->ra.count) >+ ctx->pos = 0; /* Don't know the resume offset for a short RA */ >+ return i; >+} >+ >+/** >+ * gfs2_dir_readahead - GFS2's implementation of readdir readahead >+ * @file : The directory to be read from >+ * @ctx : Context contains buffer to collect inode numbers >+ * >+ * Readahead inode disk blocks (and extended attribute blocks if requested) >+ * of every directory entry >+ * >+ * Returns: +ve number: The number of entries for which readahead calls >+ * were issued >+ * -ve values: For error conditions >+ */ >+int gfs2_dir_readahead(struct file *file, struct dir_context *ctx) >+{ >+ int error = 0; >+ struct inode *dir = file->f_mapping->host; >+ struct gfs2_inode *dip = GFS2_I(dir); >+ struct gfs2_holder d_gh; >+ >+ printk(KERN_WARNING "Start gfs2_rddir_ra syscall\n"); >+ >+ gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); >+ error = gfs2_glock_nq(&d_gh); >+ if (error) { >+ gfs2_holder_uninit(&d_gh); >+ goto out; >+ } >+ >+ error = gfs2_dir_read(dir, ctx, &file->f_ra); >+ if (error < 0) >+ goto unlock; >+ >+ printk(KERN_WARNING " Collected %u entries\n", (unsigned int) ctx->ra.count); >+ >+ /* Pass the collected inos to the workqueues to be read ahead */ >+ if (ctx->ra.count) >+ error = gfs2_queue_dir_ra(ctx, GFS2_SB(dir)); >+unlock: >+ gfs2_glock_dq_uninit(&d_gh); >+out: >+ printk(KERN_WARNING "End gfs2_rddir_ra syscall, rv:%d\n", error); >+ return error; >+}
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 850426
:
605960
|
617073
|
625592
|
678841
|
680222
|
680282
|
708389
|
731269
|
731270
|
731272
|
731273
|
731274
|
731278
|
731279
|
732739
|
732740
|
732845
|
834840
|
853502
|
909528
|
909529
|
909534
|
912745
|
917238
|
917239
|
920895
|
920896
|
920897
|
920898
|
920899
|
920902
|
920903
|
947987