From: Tristan Ye Subject: Re: [PATCH] ocfs2: Implement llseek() Date: Thu, 19 May 2011 17:13:48 +0800 Message-ID: <4DD4DF4C.501@oracle.com> References: <1305773084-19296-1-git-send-email-sunil.mushran@oracle.com> <1305773084-19296-3-git-send-email-sunil.mushran@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Cc: linux-kernel@vger.kernel.org, ocfs2-devel@oss.oracle.com, josef@redhat.com, linux-fsdevel@vger.kernel.org, linux-ext4@vger.kernel.org, linux-btrfs@vger.kernel.org, viro@ZenIV.linux.org.uk To: Sunil Mushran Return-path: In-Reply-To: <1305773084-19296-3-git-send-email-sunil.mushran@oracle.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: ocfs2-devel-bounces@oss.oracle.com Errors-To: ocfs2-devel-bounces@oss.oracle.com List-Id: linux-ext4.vger.kernel.org Sunil Mushran wrote: > ocfs2 implements its own llseek() to provide the SEEK_HOLE/SEEK_DATA > functionality. > > SEEK_HOLE sets the file pointer to the start of either a hole or an unwritten > (preallocated) extent, that is greater than or equal to the supplied offset. > > SEEK_DATA sets the file pointer to the start of an allocated extent (not > unwritten) that is greater than or equal to the supplied offset. > > If the supplied offset is on a desired region, then the file pointer is set > to it. Offsets greater than or equal to the file size return -ENXIO. > > Unwritten (preallocated) extents are considered holes because the file system > treats reads to such regions in the same way as it does to holes. > > Signed-off-by: Sunil Mushran > --- > fs/ocfs2/extent_map.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ > fs/ocfs2/extent_map.h | 2 + > fs/ocfs2/file.c | 53 ++++++++++++++++++++++++++- > 3 files changed, 150 insertions(+), 2 deletions(-) > > diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c > index 23457b4..6942c21 100644 > --- a/fs/ocfs2/extent_map.c > +++ b/fs/ocfs2/extent_map.c > @@ -832,6 +832,103 @@ out: > return ret; > } > > +int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin) > +{ > + struct inode *inode = file->f_mapping->host; > + int ret; > + unsigned int is_last = 0, is_data = 0; > + u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; > + u32 cpos, cend, clen, hole_size; > + u64 extoff, extlen; > + struct buffer_head *di_bh = NULL; > + struct ocfs2_extent_rec rec; > + > + BUG_ON(origin != SEEK_DATA && origin != SEEK_HOLE); > + > + ret = ocfs2_inode_lock(inode, &di_bh, 0); > + if (ret) { > + mlog_errno(ret); > + goto out; > + } > + > + down_read(&OCFS2_I(inode)->ip_alloc_sem); > + > + if (inode->i_size == 0 || *offset >= inode->i_size) { > + ret = -ENXIO; > + goto out_unlock; > + } Why not using if (*offset >= inode->i_size) directly? > + > + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { > + if (origin == SEEK_HOLE) > + *offset = inode->i_size; > + goto out_unlock; > + } > + > + clen = 0; > + cpos = *offset >> cs_bits; > + cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size); > + > + while (cpos < cend && !is_last) { > + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, > + &rec, &is_last); > + if (ret) { > + mlog_errno(ret); > + goto out_unlock; > + } > + > + extoff = cpos; > + extoff <<= cs_bits; > + > + if (rec.e_blkno == 0ULL) { > + clen = hole_size; > + is_data = 0; > + } else { > + BUG_ON(cpos < le32_to_cpu(rec.e_cpos)); A same assert has already been performed inside ocfs2_get_clusters_nocache(), does it make sense to do it again here? > + clen = le16_to_cpu(rec.e_leaf_clusters) - > + (cpos - le32_to_cpu(rec.e_cpos)); > + is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; > + } > + > + if ((!is_data && origin == SEEK_HOLE) || > + (is_data && origin == SEEK_DATA)) { > + if (extoff > *offset) > + *offset = extoff; > + goto out_unlock; Seems above logic is going to stop at the first time we find a hole. How about the offset was within the range of a hole already when we doing SEEK_HOLE, shouldn't we proceed detecting until the next hole gets found, whose start_offset was greater than supplied offset, according to semantics described by the the header of this patch, should it be like following? if (extoff > *offset) { *offset = extoff; goto out_unlock; } > + } > + > + if (!is_last) > + cpos += clen; > + } > + > + if (origin == SEEK_HOLE) { > + extoff = cpos; > + extoff <<= cs_bits; extoff already has been assigned properly above in while loop? > + extlen = clen; > + extlen <<= cs_bits; > + > + if ((extoff + extlen) > inode->i_size) > + extlen = inode->i_size - extoff; > + extoff += extlen; > + if (extoff > *offset) > + *offset = extoff; > + goto out_unlock; > + } > + > + ret = -ENXIO; > + > +out_unlock: > + > + brelse(di_bh); > + > + up_read(&OCFS2_I(inode)->ip_alloc_sem); > + > + ocfs2_inode_unlock(inode, 0); > +out: > + if (ret && ret != -ENXIO) > + ret = -ENXIO; > + return ret; > +} > + > int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, > struct buffer_head *bhs[], int flags, > int (*validate)(struct super_block *sb, > diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h > index e79d41c..67ea57d 100644 > --- a/fs/ocfs2/extent_map.h > +++ b/fs/ocfs2/extent_map.h > @@ -53,6 +53,8 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, > int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, > u64 map_start, u64 map_len); > > +int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin); > + > int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, > u32 *p_cluster, u32 *num_clusters, > struct ocfs2_extent_list *el, > diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c > index cce8c2b..5349a4b 100644 > --- a/fs/ocfs2/file.c > +++ b/fs/ocfs2/file.c > @@ -2572,6 +2572,55 @@ bail: > return ret; > } > > +/* Refer generic_file_llseek_unlocked() */ > +static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin) > +{ > + struct inode *inode = file->f_mapping->host; > + int ret = 0; > + > + mutex_lock(&inode->i_mutex); > + > + switch (origin) { > + case SEEK_END: > + offset += inode->i_size; > + break; > + case SEEK_CUR: > + if (offset == 0) { > + offset = file->f_pos; > + goto out; > + } > + offset += file->f_pos; > + break; > + case SEEK_DATA: > + case SEEK_HOLE: > + ret = ocfs2_seek_data_hole_offset(file, &offset, origin); > + if (ret) > + goto out; > + break; > + default: > + ret = -EINVAL; > + goto out; > + } > + > + if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) > + ret = -EINVAL; > + if (!ret && offset > inode->i_sb->s_maxbytes) > + ret = -EINVAL; > + if (ret) > + goto out; > + > + if (offset != file->f_pos) { > + file->f_pos = offset; > + file->f_version = 0; > + } > + > +out: > + mutex_unlock(&inode->i_mutex); > + if (ret) > + return ret; > + return offset; > +} > + > const struct inode_operations ocfs2_file_iops = { > .setattr = ocfs2_setattr, > .getattr = ocfs2_getattr, > @@ -2594,7 +2643,7 @@ const struct inode_operations ocfs2_special_file_iops = { > * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! > */ > const struct file_operations ocfs2_fops = { > - .llseek = generic_file_llseek, > + .llseek = ocfs2_file_llseek, > .read = do_sync_read, > .write = do_sync_write, > .mmap = ocfs2_mmap, > @@ -2642,7 +2691,7 @@ const struct file_operations ocfs2_dops = { > * the cluster. > */ > const struct file_operations ocfs2_fops_no_plocks = { > - .llseek = generic_file_llseek, > + .llseek = ocfs2_file_llseek, > .read = do_sync_read, > .write = do_sync_write, > .mmap = ocfs2_mmap,