Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 316579 Details for
Bug 458684
GFS2: glock deadlock in page fault path
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
Patch that was POSTed 11 Sep 2008
bz458684.patch (text/plain), 30.21 KB, created by
Robert Peterson
on 2008-09-12 14:20:13 UTC
(
hide
)
Description:
Patch that was POSTed 11 Sep 2008
Filename:
MIME Type:
Creator:
Robert Peterson
Created:
2008-09-12 14:20:13 UTC
Size:
30.21 KB
patch
obsolete
>diff -pur a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c >--- a/fs/gfs2/ops_address.c 2008-09-08 22:52:01.000000000 -0500 >+++ b/fs/gfs2/ops_address.c 2008-09-11 14:13:44.000000000 -0500 >@@ -36,6 +36,27 @@ > #include "util.h" > #include "glops.h" > >+static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, }; >+ >+static void fastcall gfs2_lru_cache_add(struct page *page) >+{ >+ struct pagevec *pvec = &get_cpu_var(lru_add_pvecs); >+ >+ page_cache_get(page); >+ if (!pagevec_add(pvec, page)) >+ __pagevec_lru_add(pvec); >+ put_cpu_var(lru_add_pvecs); >+} >+ >+static int gfs2_add_to_page_cache_lru(struct page *page, >+ struct address_space *mapping, >+ pgoff_t offset, gfp_t gfp_mask) >+{ >+ int ret = add_to_page_cache(page, mapping, offset, gfp_mask); >+ if (ret == 0) >+ gfs2_lru_cache_add(page); >+ return ret; >+} > > static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, > unsigned int from, unsigned int to) >@@ -664,6 +685,446 @@ out_uninit: > return ret; > } > >+/* >+ * Find or create a page at the given pagecache position. Return the locked >+ * page. This function is specifically for buffered writes. >+ */ >+static struct page *__grab_cache_page(struct address_space *mapping, >+ unsigned long index) >+{ >+ int status; >+ struct page *page; >+repeat: >+ page = find_lock_page(mapping, index); >+ if (likely(page)) >+ return page; >+ >+ page = page_cache_alloc(mapping); >+ if (!page) >+ return NULL; >+ status = gfs2_add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); >+ if (unlikely(status)) { >+ page_cache_release(page); >+ if (status == -EEXIST) >+ goto repeat; >+ return NULL; >+ } >+ return page; >+} >+ >+/** >+ * gfs2_write_begin - Begin to write to a file >+ * @file: The file to write to >+ * @mapping: The mapping in which to write >+ * @pos: The file offset at which to start writing >+ * @len: Length of the write >+ * @flags: Various flags >+ * @pagep: Pointer to return the page >+ * @fsdata: Pointer to return fs data (unused by GFS2) >+ * >+ * Returns: errno >+ */ >+ >+int gfs2_write_begin(struct file *file, struct address_space *mapping, >+ loff_t pos, unsigned len, unsigned flags, >+ struct page **pagep, void **fsdata) >+{ >+ struct gfs2_inode *ip = GFS2_I(mapping->host); >+ struct gfs2_sbd *sdp = GFS2_SB(mapping->host); >+ unsigned int data_blocks, ind_blocks, rblocks; >+ int alloc_required; >+ int error = 0; >+ struct gfs2_alloc *al; >+ pgoff_t index = pos >> PAGE_CACHE_SHIFT; >+ unsigned from = pos & (PAGE_CACHE_SIZE - 1); >+ unsigned to = from + len; >+ struct page *page; >+ >+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); >+ error = gfs2_glock_nq_atime(&ip->i_gh); >+ if (unlikely(error)) >+ goto out_uninit; >+ >+ gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); >+ error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); >+ if (error) >+ goto out_unlock; >+ >+ if (alloc_required) { >+ al = gfs2_alloc_get(ip); >+ if (!al) { >+ error = -ENOMEM; >+ goto out_unlock; >+ } >+ >+ error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); >+ if (error) >+ goto out_alloc_put; >+ >+ error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); >+ if (error) >+ goto out_qunlock; >+ >+ al->al_requested = data_blocks + ind_blocks; >+ error = gfs2_inplace_reserve(ip); >+ if (error) >+ goto out_qunlock; >+ } >+ >+ rblocks = RES_DINODE + ind_blocks; >+ if (gfs2_is_jdata(ip)) >+ rblocks += data_blocks ? data_blocks : 1; >+ if (ind_blocks || data_blocks) >+ rblocks += RES_STATFS + RES_QUOTA; >+ >+ error = gfs2_trans_begin(sdp, rblocks, >+ PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); >+ if (error) >+ goto out_trans_fail; >+ >+ error = -ENOMEM; >+ page = __grab_cache_page(mapping, index); >+ *pagep = page; >+ if (unlikely(!page)) >+ goto out_endtrans; >+ >+ if (gfs2_is_stuffed(ip)) { >+ error = 0; >+ if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { >+ error = gfs2_unstuff_dinode(ip, page); >+ if (error == 0) >+ goto prepare_write; >+ } else if (!PageUptodate(page)) { >+ error = stuffed_readpage(ip, page); >+ } >+ goto out; >+ } >+ >+prepare_write: >+ error = block_prepare_write(page, from, to, gfs2_block_map); >+out: >+ if (error == 0) >+ return 0; >+ >+ page_cache_release(page); >+ if (pos + len > ip->i_inode.i_size) >+ vmtruncate(&ip->i_inode, ip->i_inode.i_size); >+out_endtrans: >+ gfs2_trans_end(sdp); >+out_trans_fail: >+ if (alloc_required) { >+ gfs2_inplace_release(ip); >+out_qunlock: >+ gfs2_quota_unlock(ip); >+out_alloc_put: >+ gfs2_alloc_put(ip); >+ } >+out_unlock: >+ gfs2_glock_dq(&ip->i_gh); >+out_uninit: >+ gfs2_holder_uninit(&ip->i_gh); >+ return error; >+} >+ >+/** >+ * adjust_fs_space - Adjusts the free space available due to gfs2_grow >+ * @inode: the rindex inode >+ */ >+static void adjust_fs_space(struct inode *inode) >+{ >+ struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; >+ struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; >+ struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; >+ u64 fs_total, new_free; >+ >+ /* Total up the file system space, according to the latest rindex. */ >+ fs_total = gfs2_ri_total(sdp); >+ >+ spin_lock(&sdp->sd_statfs_spin); >+ if (fs_total > (m_sc->sc_total + l_sc->sc_total)) >+ new_free = fs_total - (m_sc->sc_total + l_sc->sc_total); >+ else >+ new_free = 0; >+ spin_unlock(&sdp->sd_statfs_spin); >+ fs_warn(sdp, "File system extended by %llu blocks.\n", >+ (unsigned long long)new_free); >+ gfs2_statfs_change(sdp, new_free, new_free, 0); >+} >+ >+/** >+ * gfs2_stuffed_write_end - Write end for stuffed files >+ * @inode: The inode >+ * @dibh: The buffer_head containing the on-disk inode >+ * @pos: The file position >+ * @len: The length of the write >+ * @copied: How much was actually copied by the VFS >+ * @page: The page >+ * >+ * This copies the data from the page into the inode block after >+ * the inode data structure itself. >+ * >+ * Returns: errno >+ */ >+static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, >+ loff_t pos, unsigned len, unsigned copied, >+ struct page *page) >+{ >+ struct gfs2_inode *ip = GFS2_I(inode); >+ struct gfs2_sbd *sdp = GFS2_SB(inode); >+ u64 to = pos + copied; >+ void *kaddr; >+ unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); >+ struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; >+ >+ BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); >+ kaddr = kmap_atomic(page, KM_USER0); >+ memcpy(buf + pos, kaddr + pos, copied); >+ memset(kaddr + pos + copied, 0, len - copied); >+ flush_dcache_page(page); >+ kunmap_atomic(kaddr, KM_USER0); >+ >+ if (!PageUptodate(page)) >+ SetPageUptodate(page); >+ unlock_page(page); >+ page_cache_release(page); >+ >+ if (inode->i_size < to) { >+ i_size_write(inode, to); >+ ip->i_di.di_size = inode->i_size; >+ di->di_size = cpu_to_be64(inode->i_size); >+ mark_inode_dirty(inode); >+ } >+ >+ if (inode == sdp->sd_rindex) >+ adjust_fs_space(inode); >+ >+ brelse(dibh); >+ gfs2_trans_end(sdp); >+ gfs2_glock_dq(&ip->i_gh); >+ gfs2_holder_uninit(&ip->i_gh); >+ return copied; >+} >+ >+/* >+ * If a page has any new buffers, zero them out here, and mark them uptodate >+ * and dirty so they'll be written out (in order to prevent uninitialised >+ * block data from leaking). And clear the new bit. >+ */ >+static void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) >+{ >+ unsigned int block_start, block_end; >+ struct buffer_head *head, *bh; >+ >+ BUG_ON(!PageLocked(page)); >+ if (!page_has_buffers(page)) >+ return; >+ >+ bh = head = page_buffers(page); >+ block_start = 0; >+ do { >+ block_end = block_start + bh->b_size; >+ >+ if (buffer_new(bh)) { >+ if (block_end > from && block_start < to) { >+ if (!PageUptodate(page)) { >+ unsigned start, size; >+ >+ start = max(from, block_start); >+ size = min(to, block_end) - start; >+ >+ zero_user(page, start, size); >+ set_buffer_uptodate(bh); >+ } >+ >+ clear_buffer_new(bh); >+ mark_buffer_dirty(bh); >+ } >+ } >+ >+ block_start = block_end; >+ bh = bh->b_this_page; >+ } while (bh != head); >+} >+ >+static int __block_commit_write(struct inode *inode, struct page *page, >+ unsigned from, unsigned to) >+{ >+ unsigned block_start, block_end; >+ int partial = 0; >+ unsigned blocksize; >+ struct buffer_head *bh, *head; >+ >+ blocksize = 1 << inode->i_blkbits; >+ >+ for(bh = head = page_buffers(page), block_start = 0; >+ bh != head || !block_start; >+ block_start=block_end, bh = bh->b_this_page) { >+ block_end = block_start + blocksize; >+ if (block_end <= from || block_start >= to) { >+ if (!buffer_uptodate(bh)) >+ partial = 1; >+ } else { >+ set_buffer_uptodate(bh); >+ mark_buffer_dirty(bh); >+ } >+ clear_buffer_new(bh); >+ } >+ >+ /* >+ * If this is a partial write which happened to make all buffers >+ * uptodate then we can optimize away a bogus readpage() for >+ * the next read(). Here we 'discover' whether the page went >+ * uptodate as a result of this (potentially partial) write. >+ */ >+ if (!partial) >+ SetPageUptodate(page); >+ return 0; >+} >+ >+static int block_write_end(struct file *file, struct address_space *mapping, >+ loff_t pos, unsigned len, unsigned copied, >+ struct page *page, void *fsdata) >+{ >+ struct inode *inode = mapping->host; >+ unsigned start; >+ >+ start = pos & (PAGE_CACHE_SIZE - 1); >+ >+ if (unlikely(copied < len)) { >+ /* >+ * The buffers that were written will now be uptodate, so we >+ * don't have to worry about a readpage reading them and >+ * overwriting a partial write. However if we have encountered >+ * a short write and only partially written into a buffer, it >+ * will not be marked uptodate, so a readpage might come in and >+ * destroy our partial write. >+ * >+ * Do the simplest thing, and just treat any short write to a >+ * non uptodate page as a zero-length write, and force the >+ * caller to redo the whole thing. >+ */ >+ if (!PageUptodate(page)) >+ copied = 0; >+ >+ page_zero_new_buffers(page, start+copied, start+len); >+ } >+ flush_dcache_page(page); >+ >+ /* This could be a short (even 0-length) commit */ >+ __block_commit_write(inode, page, start, start+copied); >+ >+ return copied; >+} >+ >+static int generic_write_end(struct file *file, struct address_space *mapping, >+ loff_t pos, unsigned len, unsigned copied, >+ struct page *page, void *fsdata) >+{ >+ struct inode *inode = mapping->host; >+ int i_size_changed = 0; >+ >+ copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); >+ >+ /* >+ * No need to use i_size_read() here, the i_size >+ * cannot change under us because we hold i_mutex. >+ * >+ * But it's important to update i_size while still holding page lock: >+ * page writeout could otherwise come in and zero beyond i_size. >+ */ >+ if (pos+copied > inode->i_size) { >+ i_size_write(inode, pos+copied); >+ i_size_changed = 1; >+ } >+ >+ unlock_page(page); >+ page_cache_release(page); >+ >+ /* >+ * Don't mark the inode dirty under page lock. First, it unnecessarily >+ * makes the holding time of page lock longer. Second, it forces lock >+ * ordering of page lock and transaction start for journaling >+ * filesystems. >+ */ >+ if (i_size_changed) >+ mark_inode_dirty(inode); >+ >+ return copied; >+} >+ >+/** >+ * gfs2_write_end >+ * @file: The file to write to >+ * @mapping: The address space to write to >+ * @pos: The file position >+ * @len: The length of the data >+ * @copied: >+ * @page: The page that has been written >+ * @fsdata: The fsdata (unused in GFS2) >+ * >+ * The main write_end function for GFS2. We have a separate one for >+ * stuffed files as they are slightly different, otherwise we just >+ * put our locking around the VFS provided functions. >+ * >+ * Returns: errno >+ */ >+ >+int gfs2_write_end(struct file *file, struct address_space *mapping, >+ loff_t pos, unsigned len, unsigned copied, >+ struct page *page, void *fsdata) >+{ >+ struct inode *inode = page->mapping->host; >+ struct gfs2_inode *ip = GFS2_I(inode); >+ struct gfs2_sbd *sdp = GFS2_SB(inode); >+ struct buffer_head *dibh; >+ struct gfs2_alloc *al = ip->i_alloc; >+ struct gfs2_dinode *di; >+ unsigned int from = pos & (PAGE_CACHE_SIZE - 1); >+ unsigned int to = from + len; >+ int ret; >+ >+ BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); >+ >+ ret = gfs2_meta_inode_buffer(ip, &dibh); >+ if (unlikely(ret)) { >+ unlock_page(page); >+ page_cache_release(page); >+ goto failed; >+ } >+ >+ gfs2_trans_add_bh(ip->i_gl, dibh, 1); >+ >+ if (gfs2_is_stuffed(ip)) >+ return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); >+ >+ if (!gfs2_is_writeback(ip)) >+ gfs2_page_add_databufs(ip, page, from, to); >+ >+ ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); >+ >+ if (likely(ret >= 0) && (inode->i_size > ip->i_di.di_size)) { >+ di = (struct gfs2_dinode *)dibh->b_data; >+ ip->i_di.di_size = inode->i_size; >+ di->di_size = cpu_to_be64(inode->i_size); >+ mark_inode_dirty(inode); >+ } >+ >+ if (inode == sdp->sd_rindex) >+ adjust_fs_space(inode); >+ >+ brelse(dibh); >+ gfs2_trans_end(sdp); >+failed: >+ if (al) { >+ gfs2_inplace_release(ip); >+ gfs2_quota_unlock(ip); >+ gfs2_alloc_put(ip); >+ } >+ gfs2_glock_dq(&ip->i_gh); >+ gfs2_holder_uninit(&ip->i_gh); >+ return ret; >+} >+ > /** > * gfs2_prepare_write - Prepare to write a page to a file > * @file: The file to write to >@@ -719,31 +1180,6 @@ out: > } > > /** >- * adjust_fs_space - Adjusts the free space available due to gfs2_grow >- * @inode: the rindex inode >- */ >-static void adjust_fs_space(struct inode *inode) >-{ >- struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; >- struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; >- struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; >- u64 fs_total, new_free; >- >- /* Total up the file system space, according to the latest rindex. */ >- fs_total = gfs2_ri_total(sdp); >- >- spin_lock(&sdp->sd_statfs_spin); >- if (fs_total > (m_sc->sc_total + l_sc->sc_total)) >- new_free = fs_total - (m_sc->sc_total + l_sc->sc_total); >- else >- new_free = 0; >- spin_unlock(&sdp->sd_statfs_spin); >- fs_warn(sdp, "File system extended by %llu blocks.\n", >- (unsigned long long)new_free); >- gfs2_statfs_change(sdp, new_free, new_free, 0); >-} >- >-/** > * gfs2_commit_write - Commit write to a file > * @file: The file to write to > * @page: The page containing the data >@@ -943,7 +1379,7 @@ static int gfs2_ok_for_dio(struct gfs2_i > if (gfs2_is_stuffed(ip)) > return 0; > >- if (offset > i_size_read(&ip->i_inode)) >+ if (offset >= i_size_read(&ip->i_inode)) > return 0; > return 1; > } >diff -pur a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h >--- a/fs/gfs2/ops_address.h 2008-09-08 22:51:52.000000000 -0500 >+++ b/fs/gfs2/ops_address.h 2008-09-11 13:44:41.000000000 -0500 >@@ -16,5 +16,11 @@ > > extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); > extern void gfs2_set_aops(struct inode *inode); >+extern int gfs2_write_begin(struct file *file, struct address_space *mapping, >+ loff_t pos, unsigned len, unsigned flags, >+ struct page **pagep, void **fsdata); >+extern int gfs2_write_end(struct file *file, struct address_space *mapping, >+ loff_t pos, unsigned len, unsigned copied, >+ struct page *page, void *fsdata); > > #endif /* __OPS_ADDRESS_DOT_H__ */ >diff -pur a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c >--- a/fs/gfs2/ops_file.c 2008-09-08 22:52:01.000000000 -0500 >+++ b/fs/gfs2/ops_file.c 2008-09-11 15:02:33.000000000 -0500 >@@ -23,7 +23,9 @@ > #include <linux/crc32.h> > #include <linux/lm_interface.h> > #include <linux/writeback.h> >+#include <linux/uaccess.h> > #include <asm/uaccess.h> >+#include <linux/mpage.h> > > #include "gfs2.h" > #include "incore.h" >@@ -44,6 +46,31 @@ > #include "eaops.h" > #include "ops_address.h" > >+struct iov_iter { >+ const struct iovec *iov; >+ unsigned long nr_segs; >+ size_t iov_offset; >+ size_t count; >+}; >+ >+static void iov_iter_advance(struct iov_iter *i, size_t bytes); >+static inline void iov_iter_init(struct iov_iter *i, >+ const struct iovec *iov, unsigned long nr_segs, >+ size_t count, size_t written) >+{ >+ i->iov = iov; >+ i->nr_segs = nr_segs; >+ i->iov_offset = 0; >+ i->count = count + written; >+ >+ iov_iter_advance(i, written); >+} >+ >+static inline size_t iov_iter_count(struct iov_iter *i) >+{ >+ return i->count; >+} >+ > /* > * Most fields left uninitialised to catch anybody who tries to > * use them. f_flags set to prevent file_accessed() from touching >@@ -633,14 +660,515 @@ static int gfs2_flock(struct file *file, > return do_flock(file, cmd, fl); > } > >+/** >+ * generic_file functions backported from upstream filemap.c: >+ * In many cases I needed to change generic_* to gfs2_* >+ */ >+ >+static unsigned long iov_iter_single_seg_count(struct iov_iter *i) >+{ >+ const struct iovec *iov = i->iov; >+ if (i->nr_segs == 1) >+ return i->count; >+ else >+ return min(i->count, iov->iov_len - i->iov_offset); >+} >+ >+static size_t __iovec_copy_from_user_inatomic(char *vaddr, >+ const struct iovec *iov, size_t base, size_t bytes) >+{ >+ size_t copied = 0, left = 0; >+ >+ while (bytes) { >+ char __user *buf = iov->iov_base + base; >+ int copy = min(bytes, iov->iov_len - base); >+ >+ base = 0; >+ left = __copy_from_user_inatomic_nocache(vaddr, buf, copy); >+ copied += copy; >+ bytes -= copy; >+ vaddr += copy; >+ iov++; >+ >+ if (unlikely(left)) >+ break; >+ } >+ return copied - left; >+} >+ >+/* >+ * Copy as much as we can into the page and return the number of bytes which >+ * were sucessfully copied. If a fault is encountered then return the number of >+ * bytes which were copied. >+ */ >+static size_t iov_iter_copy_from_user_atomic(struct page *page, >+ struct iov_iter *i, unsigned long offset, size_t bytes) >+{ >+ char *kaddr; >+ size_t copied; >+ >+ kaddr = kmap_atomic(page, KM_USER0); >+ if (likely(i->nr_segs == 1)) { >+ int left; >+ char __user *buf = i->iov->iov_base + i->iov_offset; >+ left = __copy_from_user_inatomic_nocache(kaddr + offset, >+ buf, bytes); >+ copied = bytes - left; >+ } else { >+ copied = __iovec_copy_from_user_inatomic(kaddr + offset, >+ i->iov, i->iov_offset, bytes); >+ } >+ kunmap_atomic(kaddr, KM_USER0); >+ >+ return copied; >+} >+ >+static void iov_iter_advance(struct iov_iter *i, size_t bytes) >+{ >+ BUG_ON(i->count < bytes); >+ >+ if (likely(i->nr_segs == 1)) { >+ i->iov_offset += bytes; >+ i->count -= bytes; >+ } else { >+ const struct iovec *iov = i->iov; >+ size_t base = i->iov_offset; >+ >+ /* >+ * The !iov->iov_len check ensures we skip over unlikely >+ * zero-length segments (without overruning the iovec). >+ */ >+ while (bytes || unlikely(i->count && !iov->iov_len)) { >+ int copy; >+ >+ copy = min(bytes, iov->iov_len - base); >+ BUG_ON(!i->count || i->count < copy); >+ i->count -= copy; >+ bytes -= copy; >+ base += copy; >+ if (iov->iov_len == base) { >+ iov++; >+ base = 0; >+ } >+ } >+ i->iov = iov; >+ i->iov_offset = base; >+ } >+} >+ >+static inline int gfs2_fault_in_pages_readable(const char __user *uaddr, >+ int size) >+{ >+ volatile char c; >+ int ret; >+ >+ if (unlikely(size == 0)) >+ return 0; >+ >+ ret = __get_user(c, uaddr); >+ if (ret == 0) { >+ const char __user *end = uaddr + size - 1; >+ >+ if (((unsigned long)uaddr & PAGE_MASK) != >+ ((unsigned long)end & PAGE_MASK)) >+ ret = __get_user(c, end); >+ } >+ return ret; >+} >+static int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) >+{ >+ char __user *buf = i->iov->iov_base + i->iov_offset; >+ bytes = min(bytes, i->iov->iov_len - i->iov_offset); >+ return gfs2_fault_in_pages_readable(buf, bytes); >+} >+ >+static ssize_t gfs2_perform_write(struct file *file, >+ struct iov_iter *i, loff_t pos) >+{ >+ struct address_space *mapping = file->f_mapping; >+ long status = 0; >+ ssize_t written = 0; >+ unsigned int flags = 0; >+ >+ /* >+ * Copies from kernel address space cannot fail (NFSD is a big user). >+ */ >+ /*if (segment_eq(get_fs(), KERNEL_DS)) >+ flags |= AOP_FLAG_UNINTERRUPTIBLE;*/ >+ >+ do { >+ struct page *page; >+ pgoff_t index; /* Pagecache index for current page */ >+ unsigned long offset; /* Offset into pagecache page */ >+ unsigned long bytes; /* Bytes to write to page */ >+ size_t copied; /* Bytes copied from user */ >+ void *fsdata; >+ >+ offset = (pos & (PAGE_CACHE_SIZE - 1)); >+ index = pos >> PAGE_CACHE_SHIFT; >+ bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, >+ iov_iter_count(i)); >+ >+again: >+ >+ /* >+ * Bring in the user page that we will copy from _first_. >+ * Otherwise there's a nasty deadlock on copying from the >+ * same page as we're writing to, without it being marked >+ * up-to-date. >+ * >+ * Not only is this an optimisation, but it is also required >+ * to check that the address is actually valid, when atomic >+ * usercopies are used, below. >+ */ >+ if (unlikely(iov_iter_fault_in_readable(i, bytes))) { >+ status = -EFAULT; >+ break; >+ } >+ >+ status = gfs2_write_begin(file, mapping, pos, bytes, flags, >+ &page, &fsdata); >+ if (unlikely(status)) >+ break; >+ >+ /* pagefault disable */ >+ inc_preempt_count(); >+ barrier(); >+ copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); >+ /* pagefault enable */ >+ barrier(); >+ dec_preempt_count(); >+ barrier(); >+ preempt_check_resched(); >+ >+ flush_dcache_page(page); >+ >+ status = gfs2_write_end(file, mapping, pos, bytes, copied, >+ page, fsdata); >+ if (unlikely(status < 0)) >+ break; >+ copied = status; >+ >+ cond_resched(); >+ >+ iov_iter_advance(i, copied); >+ if (unlikely(copied == 0)) { >+ /* >+ * If we were unable to copy any data at all, we must >+ * fall back to a single segment length write. >+ * >+ * If we didn't fallback here, we could livelock >+ * because not all segments in the iov can be copied at >+ * once without a pagefault. >+ */ >+ bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, >+ iov_iter_single_seg_count(i)); >+ goto again; >+ } >+ pos += copied; >+ written += copied; >+ >+ balance_dirty_pages_ratelimited(mapping); >+ >+ } while (iov_iter_count(i)); >+ >+ return written ? written : status; >+} >+ >+static ssize_t >+gfs2_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, >+ unsigned long nr_segs, loff_t pos, loff_t *ppos, >+ size_t count, ssize_t written) >+{ >+ struct file *file = iocb->ki_filp; >+ struct address_space *mapping = file->f_mapping; >+ const struct address_space_operations *a_ops = mapping->a_ops; >+ struct inode *inode = mapping->host; >+ ssize_t status; >+ struct iov_iter i; >+ >+ iov_iter_init(&i, iov, nr_segs, count, written); >+ status = gfs2_perform_write(file, &i, pos); >+ >+ if (likely(status >= 0)) { >+ written += status; >+ *ppos = pos + status; >+ >+ /* >+ * For now, when the user asks for O_SYNC, we'll actually give >+ * O_DSYNC >+ */ >+ if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) { >+ if (!a_ops->writepage || !is_sync_kiocb(iocb)) >+ status = generic_osync_inode(inode, mapping, >+ OSYNC_METADATA|OSYNC_DATA); >+ } >+ } >+ >+ /* >+ * If we get here for O_DIRECT writes then we must have fallen through >+ * to buffered writes (block instantiation inside i_size). So we sync >+ * the file data here, to try to honour O_DIRECT expectations. >+ */ >+ if (unlikely(file->f_flags & O_DIRECT) && written) >+ status = filemap_write_and_wait(mapping); >+ >+ return written ? written : status; >+} >+ >+/* >+ * Performs necessary checks before doing a write >+ * @iov: io vector request >+ * @nr_segs: number of segments in the iovec >+ * @count: number of bytes to write >+ * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE >+ * >+ * Adjust number of segments and amount of bytes to write (nr_segs should be >+ * properly initialized first). Returns appropriate error code that caller >+ * should return or zero in case that write should be allowed. >+ */ >+static int generic_segment_checks(const struct iovec *iov, >+ unsigned long *nr_segs, size_t *count, int access_flags) >+{ >+ unsigned long seg; >+ size_t cnt = 0; >+ for (seg = 0; seg < *nr_segs; seg++) { >+ const struct iovec *iv = &iov[seg]; >+ >+ /* >+ * If any segment has a negative length, or the cumulative >+ * length ever wraps negative then return -EINVAL. >+ */ >+ cnt += iv->iov_len; >+ if (unlikely((ssize_t)(cnt|iv->iov_len) < 0)) >+ return -EINVAL; >+ if (access_ok(access_flags, iv->iov_base, iv->iov_len)) >+ continue; >+ if (seg == 0) >+ return -EFAULT; >+ *nr_segs = seg; >+ cnt -= iv->iov_len; /* This segment is no good */ >+ break; >+ } >+ *count = cnt; >+ return 0; >+} >+ >+static ssize_t >+__gfs2_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, >+ unsigned long nr_segs, loff_t *ppos) >+{ >+ struct file *file = iocb->ki_filp; >+ struct address_space * mapping = file->f_mapping; >+ size_t ocount; /* original count */ >+ size_t count; /* after file limit checks */ >+ struct inode *inode = mapping->host; >+ loff_t pos; >+ ssize_t written; >+ ssize_t err; >+ >+ ocount = 0; >+ err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); >+ if (err) >+ return err; >+ >+ count = ocount; >+ pos = *ppos; >+ >+ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); >+ >+ /* We can write back this queue in page reclaim */ >+ current->backing_dev_info = mapping->backing_dev_info; >+ written = 0; >+ >+ err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); >+ if (err) >+ goto out; >+ >+ if (count == 0) >+ goto out; >+ >+ err = remove_suid(file->f_dentry); >+ if (err) >+ goto out; >+ >+ file_update_time(file); >+ >+ /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ >+ if (unlikely(file->f_flags & O_DIRECT)) { >+ loff_t endbyte; >+ ssize_t written_buffered; >+ >+ written = generic_file_direct_write(iocb, iov, &nr_segs, pos, >+ ppos, count, ocount); >+ if (written < 0 || written == count) >+ goto out; >+ /* >+ * direct-io write to a hole: fall through to buffered I/O >+ * for completing the rest of the request. >+ */ >+ pos += written; >+ count -= written; >+ written_buffered = gfs2_file_buffered_write(iocb, iov, >+ nr_segs, pos, >+ ppos, count, >+ written); >+ /* >+ * If gfs2_file_buffered_write() returned a synchronous error >+ * then we want to return the number of bytes which were >+ * direct-written, or the error code if that was zero. Note >+ * that this differs from normal direct-io semantics, which >+ * will return -EFOO even if some bytes were written. >+ */ >+ if (written_buffered < 0) { >+ err = written_buffered; >+ goto out; >+ } >+ >+ /* >+ * We need to ensure that the page cache pages are written to >+ * disk and invalidated to preserve the expected O_DIRECT >+ * semantics. >+ */ >+ endbyte = pos + written_buffered - written - 1; >+ err = do_sync_file_range(file, pos, endbyte, >+ SYNC_FILE_RANGE_WAIT_BEFORE| >+ SYNC_FILE_RANGE_WRITE| >+ SYNC_FILE_RANGE_WAIT_AFTER); >+ if (err == 0) { >+ written = written_buffered; >+ invalidate_mapping_pages(mapping, >+ pos >> PAGE_CACHE_SHIFT, >+ endbyte >> PAGE_CACHE_SHIFT); >+ } else { >+ /* >+ * We don't know how much we wrote, so just return >+ * the number of bytes which were direct-written >+ */ >+ } >+ } else { >+ written = gfs2_file_buffered_write(iocb, iov, nr_segs, >+ pos, ppos, count, written); >+ } >+out: >+ current->backing_dev_info = NULL; >+ return written ? written : err; >+} >+ >+static ssize_t >+gfs2_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, >+ unsigned long nr_segs, loff_t *ppos) >+{ >+ struct file *file = iocb->ki_filp; >+ struct address_space *mapping = file->f_mapping; >+ struct inode *inode = mapping->host; >+ ssize_t ret; >+ loff_t pos = *ppos; >+ >+ ret = __gfs2_file_aio_write_nolock(iocb, iov, nr_segs, ppos); >+ >+ if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { >+ int err; >+ >+ err = sync_page_range_nolock(inode, mapping, pos, ret); >+ if (err < 0) >+ ret = err; >+ } >+ return ret; >+} >+ >+static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const char __user *buf, >+ size_t count, loff_t pos) >+{ >+ struct file *file = iocb->ki_filp; >+ struct address_space *mapping = file->f_mapping; >+ struct inode *inode = mapping->host; >+ ssize_t ret; >+ struct iovec local_iov = { .iov_base = (void __user *)buf, >+ .iov_len = count }; >+ >+ BUG_ON(iocb->ki_pos != pos); >+ >+ mutex_lock(&inode->i_mutex); >+ ret = __gfs2_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); >+ mutex_unlock(&inode->i_mutex); >+ >+ if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { >+ ssize_t err; >+ >+ err = sync_page_range(inode, mapping, pos, ret); >+ if (err < 0) >+ ret = err; >+ } >+ return ret; >+} >+ >+static ssize_t gfs2_file_write_nolock(struct file *file, >+ const struct iovec *iov, >+ unsigned long nr_segs, loff_t *ppos) >+{ >+ struct kiocb kiocb; >+ ssize_t ret; >+ >+ init_sync_kiocb(&kiocb, file); >+ ret = gfs2_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); >+ if (-EIOCBQUEUED == ret) >+ ret = wait_on_sync_kiocb(&kiocb); >+ return ret; >+} >+ >+static ssize_t gfs2_file_write(struct file *file, const char __user *buf, >+ size_t count, loff_t *ppos) >+{ >+ struct address_space *mapping = file->f_mapping; >+ struct inode *inode = mapping->host; >+ ssize_t ret; >+ struct iovec local_iov = { .iov_base = (void __user *)buf, >+ .iov_len = count }; >+ >+ mutex_lock(&inode->i_mutex); >+ ret = gfs2_file_write_nolock(file, &local_iov, 1, ppos); >+ mutex_unlock(&inode->i_mutex); >+ >+ if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { >+ ssize_t err; >+ >+ err = sync_page_range(inode, mapping, *ppos - ret, ret); >+ if (err < 0) >+ ret = err; >+ } >+ return ret; >+} >+ >+static ssize_t gfs2_file_writev(struct file *file, const struct iovec *iov, >+ unsigned long nr_segs, loff_t *ppos) >+{ >+ struct address_space *mapping = file->f_mapping; >+ struct inode *inode = mapping->host; >+ ssize_t ret; >+ >+ mutex_lock(&inode->i_mutex); >+ ret = gfs2_file_write_nolock(file, iov, nr_segs, ppos); >+ mutex_unlock(&inode->i_mutex); >+ >+ if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { >+ int err; >+ >+ err = sync_page_range(inode, mapping, *ppos - ret, ret); >+ if (err < 0) >+ ret = err; >+ } >+ return ret; >+} >+ > const struct file_operations gfs2_file_fops = { > .llseek = gfs2_llseek, > .read = generic_file_read, > .readv = generic_file_readv, > .aio_read = generic_file_aio_read, >- .write = generic_file_write, >- .writev = generic_file_writev, >- .aio_write = generic_file_aio_write, >+ .write = gfs2_file_write, >+ .writev = gfs2_file_writev, >+ .aio_write = gfs2_file_aio_write, > .unlocked_ioctl = gfs2_ioctl, > .mmap = gfs2_mmap, > .open = gfs2_open, >@@ -668,9 +1196,9 @@ const struct file_operations gfs2_file_f > .read = generic_file_read, > .readv = generic_file_readv, > .aio_read = generic_file_aio_read, >- .write = generic_file_write, >- .writev = generic_file_writev, >- .aio_write = generic_file_aio_write, >+ .write = gfs2_file_write, >+ .writev = gfs2_file_writev, >+ .aio_write = gfs2_file_aio_write, > .unlocked_ioctl = gfs2_ioctl, > .mmap = gfs2_mmap, > .open = gfs2_open,
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 458684
:
314028
|
314549
|
314680
|
314745
|
314760
|
314761
|
314762
|
314981
|
315157
|
315399
|
315400
|
315660
|
316226
|
316480
|
316579
|
316802