Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 917239 Details for
Bug 850426
gfs2: Add xgetdents syscall to the kernel
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
Variant of the previous patch with allocation of buffer in the filesystem
readahead-async-works-gfs2-alloc-swhiteho-suggestions-06302014.patch (text/plain), 15.11 KB, created by
Abhijith Das
on 2014-07-11 03:43:39 UTC
(
hide
)
Description:
Variant of the previous patch with allocation of buffer in the filesystem
Filename:
MIME Type:
Creator:
Abhijith Das
Created:
2014-07-11 03:43:39 UTC
Size:
15.11 KB
patch
obsolete
>diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl >index d6b8679..3e0ef85 100644 >--- a/arch/x86/syscalls/syscall_32.tbl >+++ b/arch/x86/syscalls/syscall_32.tbl >@@ -360,3 +360,4 @@ > 351 i386 sched_setattr sys_sched_setattr > 352 i386 sched_getattr sys_sched_getattr > 353 i386 renameat2 sys_renameat2 >+354 i386 dirreadahead sys_dirreadahead >diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl >index 04376ac..11d516d2 100644 >--- a/arch/x86/syscalls/syscall_64.tbl >+++ b/arch/x86/syscalls/syscall_64.tbl >@@ -323,6 +323,7 @@ > 314 common sched_setattr sys_sched_setattr > 315 common sched_getattr sys_sched_getattr > 316 common renameat2 sys_renameat2 >+317 common dirreadahead sys_dirreadahead > > # > # x32-specific system call numbers start at 512 to avoid cache impact >diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile >index 8612820..2765c83 100644 >--- a/fs/gfs2/Makefile >+++ b/fs/gfs2/Makefile >@@ -4,7 +4,8 @@ gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ > glops.o log.o lops.o main.o meta_io.o \ > aops.o dentry.o export.o file.o \ > ops_fstype.o inode.o quota.o \ >- recovery.o rgrp.o super.o sys.o trans.o util.o >+ recovery.o rgrp.o super.o sys.o trans.o \ >+ dir_readahead.o util.o > > gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o > >diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c >index 1a349f9..f068763 100644 >--- a/fs/gfs2/dir.c >+++ b/fs/gfs2/dir.c >@@ -1217,6 +1217,20 @@ static int compare_dents(const void *a, const void *b) > return ret; > } > >+static int gfs2_dirent_dot_or_dotdot(const struct gfs2_dirent *dent) >+{ >+ const char *name = (char *)(dent + 1); >+ >+ if (be16_to_cpu(dent->de_type) == DT_DIR) { >+ if (be16_to_cpu(dent->de_name_len) == 1 && name[0] == '.') >+ return 1; >+ if (be16_to_cpu(dent->de_name_len) == 2 && >+ strncmp(name, "..", 2) == 0) >+ return 1; >+ } >+ return 0; >+} >+ > /** > * do_filldir_main - read out directory entries > * @dip: The GFS2 inode >@@ -1262,8 +1276,12 @@ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, > ctx->pos = off; > > if (off_next == off) { >- if (*copied && !run) >+ if (*copied && !run) { >+ struct gfs2_dir_ra *d_ra = ctx->opaque; >+ if (d_ra) >+ set_bit(RA_FL_HASHCOLL, &d_ra->flags); > return 1; >+ } > run = 1; > } else > run = 0; >@@ -1273,11 +1291,18 @@ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, > ctx->pos = off; > } > >- if (!dir_emit(ctx, (const char *)(dent + 1), >- be16_to_cpu(dent->de_name_len), >- be64_to_cpu(dent->de_inum.no_addr), >- be16_to_cpu(dent->de_type))) >- return 1; >+ if (ctx->actor) { >+ if (!dir_emit(ctx, (const char *)(dent + 1), >+ be16_to_cpu(dent->de_name_len), >+ be64_to_cpu(dent->de_inum.no_addr), >+ be16_to_cpu(dent->de_type))) >+ return 1; >+ } else { /* we were called by dir_readahead */ >+ if (gfs2_dirent_dot_or_dotdot(dent)) >+ continue; >+ if (collect_inode_blocks(ctx, be64_to_cpu(dent->de_inum.no_addr))) >+ return 1; >+ } > > *copied = 1; > } >@@ -1311,8 +1336,7 @@ static void gfs2_free_sort_buffer(void *ptr) > } > > static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, >- int *copied, unsigned *depth, >- u64 leaf_no) >+ int *copied, unsigned *depth, u64 leaf_no) > { > struct gfs2_inode *ip = GFS2_I(inode); > struct gfs2_sbd *sdp = GFS2_SB(inode); >@@ -1399,14 +1423,14 @@ out: > } > > /** >- * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks. >+ * gfs2_dir_leaf_ra - Issue read-ahead requests for leaf blocks. > * > * Note: we can't calculate each index like dir_e_read can because we don't > * have the leaf, and therefore we don't have the depth, and therefore we > * don't have the length. So we have to just read enough ahead to make up > * for the loss of information. > */ >-static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, >+static void gfs2_dir_leaf_ra(struct inode *inode, unsigned hsize, u32 index, > struct file_ra_state *f_ra) > { > struct gfs2_inode *ip = GFS2_I(inode); >@@ -1474,11 +1498,10 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx, > if (IS_ERR(lp)) > return PTR_ERR(lp); > >- gfs2_dir_readahead(inode, hsize, index, f_ra); >+ gfs2_dir_leaf_ra(inode, hsize, index, f_ra); > > while (index < hsize) { >- error = gfs2_dir_read_leaf(inode, ctx, >- &copied, &depth, >+ error = gfs2_dir_read_leaf(inode, ctx, &copied, &depth, > be64_to_cpu(lp[index])); > if (error) > break; >diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h >index 126c65d..429eea9 100644 >--- a/fs/gfs2/dir.h >+++ b/fs/gfs2/dir.h >@@ -25,6 +25,21 @@ struct gfs2_diradd { > struct buffer_head *bh; > }; > >+extern struct workqueue_struct *gfs2_dir_ra_wq; >+#define RA_MAX_INOS 131072 /*128K */ >+#define RA_FL_HASHCOLL 1 >+ >+struct gfs2_dir_ra { >+ u64 *inos; >+ size_t size; >+ size_t count; >+ unsigned int req; >+ unsigned long flags; >+}; >+ >+extern int gfs2_dir_readahead(struct file *file, struct dir_context *ctx, >+ unsigned int count); >+extern int collect_inode_blocks(struct dir_context *ctx, u64 offset); > extern struct inode *gfs2_dir_search(struct inode *dir, > const struct qstr *filename, > bool fail_on_exist); >diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c >index 6ab0cfb..ecf7441 100644 >--- a/fs/gfs2/file.c >+++ b/fs/gfs2/file.c >@@ -1077,6 +1077,7 @@ const struct file_operations gfs2_file_fops = { > > const struct file_operations gfs2_dir_fops = { > .iterate = gfs2_readdir, >+ .dir_readahead = gfs2_dir_readahead, > .unlocked_ioctl = gfs2_ioctl, > .open = gfs2_open, > .release = gfs2_release, >@@ -1107,6 +1108,7 @@ const struct file_operations gfs2_file_fops_nolock = { > > const struct file_operations gfs2_dir_fops_nolock = { > .iterate = gfs2_readdir, >+ .dir_readahead = gfs2_dir_readahead, > .unlocked_ioctl = gfs2_ioctl, > .open = gfs2_open, > .release = gfs2_release, >diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c >index 82b6ac8..71e8ce5 100644 >--- a/fs/gfs2/main.c >+++ b/fs/gfs2/main.c >@@ -161,9 +161,14 @@ static int __init init_gfs2_fs(void) > if (!gfs2_control_wq) > goto fail_recovery; > >+ gfs2_dir_ra_wq = alloc_workqueue("gfs2_dir_ra", >+ WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); >+ if (!gfs2_dir_ra_wq) >+ goto fail_control; >+ > gfs2_page_pool = mempool_create_page_pool(64, 0); > if (!gfs2_page_pool) >- goto fail_control; >+ goto fail_ra; > > gfs2_register_debugfs(); > >@@ -171,6 +176,8 @@ static int __init init_gfs2_fs(void) > > return 0; > >+fail_ra: >+ destroy_workqueue(gfs2_dir_ra_wq); > fail_control: > destroy_workqueue(gfs2_control_wq); > fail_recovery: >@@ -224,6 +231,7 @@ static void __exit exit_gfs2_fs(void) > unregister_filesystem(&gfs2meta_fs_type); > destroy_workqueue(gfs_recovery_wq); > destroy_workqueue(gfs2_control_wq); >+ destroy_workqueue(gfs2_dir_ra_wq); > list_lru_destroy(&gfs2_qd_lru); > > rcu_barrier(); >diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c >index 1319b5c..2129501 100644 >--- a/fs/gfs2/super.c >+++ b/fs/gfs2/super.c >@@ -849,6 +849,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) > kthread_stop(sdp->sd_quotad_process); > kthread_stop(sdp->sd_logd_process); > >+ flush_workqueue(gfs2_dir_ra_wq); > flush_workqueue(gfs2_delete_workqueue); > gfs2_quota_sync(sdp->sd_vfs, 0); > gfs2_statfs_sync(sdp->sd_vfs, 0); >diff --git a/fs/readdir.c b/fs/readdir.c >index 5b53d99..f65c3bf 100644 >--- a/fs/readdir.c >+++ b/fs/readdir.c >@@ -196,6 +196,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, > struct linux_dirent __user * lastdirent; > struct getdents_callback buf = { > .ctx.actor = filldir, >+ .ctx.opaque = NULL, > .count = count, > .current_dir = dirent > }; >@@ -276,6 +277,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, > struct linux_dirent64 __user * lastdirent; > struct getdents_callback64 buf = { > .ctx.actor = filldir64, >+ .ctx.opaque = NULL, > .count = count, > .current_dir = dirent > }; >@@ -302,3 +304,50 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, > fdput(f); > return error; > } >+ >+SYSCALL_DEFINE3(dirreadahead, unsigned int, fd, >+ loff_t __user *, offset, unsigned int, count) >+{ >+ struct fd f; >+ struct inode *inode; >+ int error = -ENOTDIR; >+ loff_t off = 0; >+ struct dir_context ctx = {.actor = NULL, .opaque = NULL}; >+ >+ if (!count) >+ return -EINVAL; >+ >+ f = fdget(fd); >+ if (!f.file) >+ return -EBADF; >+ >+ inode = f.file->f_path.dentry->d_inode; >+ >+ error = -ENOTSUPP; >+ if (!f.file->f_op || !f.file->f_op->dir_readahead) >+ goto out; >+ >+ error = security_file_permission(f.file, MAY_READ); >+ if (error) >+ goto out; >+ >+ error = -EFAULT; >+ if (__get_user(ctx.pos, offset)) >+ goto out; >+ >+ error = mutex_lock_killable(&inode->i_mutex); >+ if (error) >+ goto out; >+ >+ error = -ENOENT; >+ if (!IS_DEADDIR(inode)) { >+ error = f.file->f_op->dir_readahead(f.file, &ctx, count); >+ if (__put_user(ctx.pos, offset)) >+ error = -EFAULT; >+ file_accessed(f.file); >+ } >+ mutex_unlock(&inode->i_mutex); >+out: >+ fdput(f); >+ return error; >+} >diff --git a/include/linux/fs.h b/include/linux/fs.h >index 7a9c5bc..60b9d98 100644 >--- a/include/linux/fs.h >+++ b/include/linux/fs.h >@@ -1435,9 +1435,11 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); > * to have different dirent layouts depending on the binary type. > */ > typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); >+ > struct dir_context { > const filldir_t actor; > loff_t pos; >+ void *opaque; > }; > > struct block_device_operations; >@@ -1456,6 +1458,7 @@ struct file_operations { > ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); > ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); > int (*iterate) (struct file *, struct dir_context *); >+ int (*dir_readahead) (struct file *, struct dir_context *, unsigned int); > unsigned int (*poll) (struct file *, struct poll_table_struct *); > long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); > long (*compat_ioctl) (struct file *, unsigned int, unsigned long); >diff --git a/fs/gfs2/dir_readahead.c b/fs/gfs2/dir_readahead.c >new file mode 100644 >index 0000000..c8f0781 >--- /dev/null >+++ b/fs/gfs2/dir_readahead.c >@@ -0,0 +1,213 @@ >+#include <linux/slab.h> >+#include <linux/spinlock.h> >+#include <linux/completion.h> >+#include <linux/buffer_head.h> >+#include <linux/pagemap.h> >+#include <linux/uio.h> >+#include <linux/blkdev.h> >+#include <linux/mm.h> >+#include <linux/mount.h> >+#include <linux/sort.h> >+#include <linux/fs.h> >+#include <linux/gfs2_ondisk.h> >+#include <linux/falloc.h> >+#include <linux/swap.h> >+#include <linux/crc32.h> >+#include <linux/writeback.h> >+#include <asm/uaccess.h> >+#include <linux/dlm.h> >+#include <linux/dlm_plock.h> >+ >+#include "gfs2.h" >+#include "incore.h" >+#include "bmap.h" >+#include "dir.h" >+#include "glock.h" >+#include "glops.h" >+#include "inode.h" >+#include "log.h" >+#include "meta_io.h" >+#include "quota.h" >+#include "rgrp.h" >+#include "trans.h" >+#include "util.h" >+ >+struct workqueue_struct *gfs2_dir_ra_wq; >+ >+static int compare_inos(const void *a, const void *b) >+{ >+ u64 ino_a, ino_b; >+ >+ ino_a = *(u64 *)a; >+ ino_b = *(u64 *)b; >+ >+ if (ino_a > ino_b) >+ return 1; >+ return -1; >+} >+ >+static int collect_more(struct gfs2_dir_ra *d_ra) >+{ >+ return (d_ra->count < d_ra->req && >+ (d_ra->count * sizeof(u64)) < d_ra->size); >+} >+ >+int collect_inode_blocks(struct dir_context *ctx, u64 ino) >+{ >+ struct gfs2_dir_ra *d_ra = (struct gfs2_dir_ra *) ctx->opaque; >+ >+ if (!collect_more(d_ra)) >+ return 1; /* Collected requested blocks */ >+ >+ d_ra->inos[d_ra->count++] = ino; >+ return 0; >+} >+ >+struct dir_ra_work { >+ struct work_struct work; >+ u64 ino; >+ struct gfs2_sbd *sdp; >+}; >+ >+static void dir_ra_work_func(struct work_struct *work) >+{ >+ struct dir_ra_work *w = container_of(work, struct dir_ra_work, work); >+ >+ /* XXX: What to do if sdp disappears by the time we get here? */ >+ struct inode *inode = gfs2_lookup_by_inum(w->sdp, w->ino, NULL, >+ GFS2_BLKST_DINODE); >+ if (IS_ERR(inode)) { >+ fs_err(w->sdp, "can't read in inode at addr:%llu: %ld\n", >+ w->ino, PTR_ERR(inode)); >+ } >+ gfs2_inode_refresh(GFS2_I(inode)); >+ iput(inode); >+ kfree(work); >+} >+ >+int gfs2_queue_dir_ra(struct dir_context *ctx, struct gfs2_sbd *sdp) >+{ >+ int i; >+ struct gfs2_dir_ra *d_ra = (struct gfs2_dir_ra *) ctx->opaque; >+ >+ sort(d_ra->inos, d_ra->count, sizeof(u64), compare_inos, NULL); >+ >+ for (i=0; i<d_ra->count; i++) { >+ struct dir_ra_work *w; >+ >+ w = kmalloc(sizeof(struct dir_ra_work), GFP_NOFS | __GFP_NOWARN); >+ if (!w) >+ break; >+ >+ w->ino = d_ra->inos[i]; >+ w->sdp = sdp; >+ INIT_WORK(&w->work, dir_ra_work_func); >+ queue_work(gfs2_dir_ra_wq, &w->work); >+ } >+ if (!i) >+ return -ENOMEM; >+ if (i != d_ra->count) >+ ctx->pos = 0; /* Don't know the resume offset for a short RA */ >+ return i; >+} >+ >+static inline unsigned int compute_ra_bufsize(unsigned int count) >+{ >+ unsigned int size = count * (sizeof(u64)); >+ >+ if (size > KMALLOC_MAX_SIZE) >+ return KMALLOC_MAX_SIZE; >+ if (size < KMALLOC_MIN_SIZE) >+ return KMALLOC_MIN_SIZE; >+ >+ return size; >+} >+ >+static int init_ra_context(struct gfs2_inode *ip, struct dir_context *ctx, >+ unsigned int count) >+{ >+ unsigned int bufsize; >+ struct gfs2_dir_ra *d_ra = (struct gfs2_dir_ra *) ctx->opaque; >+ >+ memset(d_ra, 0, sizeof(struct gfs2_dir_ra)); >+ count = count > RA_MAX_INOS ? RA_MAX_INOS : count; >+ count = count > ip->i_entries ? ip->i_entries : count; >+ >+ bufsize = compute_ra_bufsize(count); >+ d_ra->inos = kmalloc(bufsize, GFP_NOFS | __GFP_NOWARN); >+ if (!d_ra->inos) >+ return -ENOMEM; >+ >+ d_ra->size = bufsize; >+ d_ra->req = count; >+ >+ return 0; >+} >+ >+static void uninit_ra_context(struct dir_context *ctx) >+{ >+ struct gfs2_dir_ra *d_ra; >+ >+ if (!ctx || !ctx->opaque) >+ return; >+ d_ra = (struct gfs2_dir_ra *) ctx->opaque; >+ if (d_ra->inos) >+ kfree(d_ra->inos); >+ memset(d_ra, 0, sizeof(struct gfs2_dir_ra)); >+} >+/** >+ * gfs2_dir_readahead - GFS2's implementation of readdir readahead >+ * @file : The directory to be read from >+ * @ctx : Context contains buffer to collect inode numbers >+ * >+ * Readahead inode disk blocks (and extended attribute blocks if requested) >+ * of every directory entry >+ * >+ * Returns: +ve number: The number of entries for which readahead calls >+ * were issued >+ * -ve values: For error conditions >+ */ >+int gfs2_dir_readahead(struct file *file, struct dir_context *ctx, unsigned int count) >+{ >+ int error = -EINVAL; >+ struct inode *dir = file->f_mapping->host; >+ struct gfs2_inode *dip = GFS2_I(dir); >+ struct gfs2_holder d_gh; >+ struct gfs2_dir_ra d_ra; >+ >+ printk(KERN_WARNING "Start gfs2_rddir_ra syscall\n"); >+ >+ if (!ctx) >+ goto out; >+ >+ ctx->opaque = &d_ra; >+ error = init_ra_context(dip, ctx, count); >+ if (error) >+ goto out; >+ >+ gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); >+ error = gfs2_glock_nq(&d_gh); >+ if (error) { >+ gfs2_holder_uninit(&d_gh); >+ goto uninit; >+ } >+ >+retry: >+ error = gfs2_dir_read(dir, ctx, &file->f_ra); >+ if (test_bit(RA_FL_HASHCOLL, &d_ra.flags)) { >+ clear_bit(RA_FL_HASHCOLL, &d_ra.flags); >+ goto retry; >+ } >+ printk(KERN_WARNING " Collected %u entries\n", (unsigned int) d_ra.count); >+ >+ /* Pass the collected inos to the workqueues to be read ahead */ >+ if (d_ra.count) >+ error = gfs2_queue_dir_ra(ctx, GFS2_SB(dir)); >+ >+ gfs2_glock_dq_uninit(&d_gh); >+uninit: >+ uninit_ra_context(ctx); >+out: >+ printk(KERN_WARNING "End gfs2_rddir_ra syscall, rv:%d\n", error); >+ return error; >+}
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 850426
:
605960
|
617073
|
625592
|
678841
|
680222
|
680282
|
708389
|
731269
|
731270
|
731272
|
731273
|
731274
|
731278
|
731279
|
732739
|
732740
|
732845
|
834840
|
853502
|
909528
|
909529
|
909534
|
912745
|
917238
|
917239
|
920895
|
920896
|
920897
|
920898
|
920899
|
920902
|
920903
|
947987