优秀的编程知识分享平台

网站首页 > 技术文章 正文

LINUX VFS分析之do_sys_open接口的文件打开接口do_last分析

nanyue 2024-10-15 11:39:59 技术文章 4 ℃

在上面几篇文章中已经介绍了文件描述符的创建、fd的创建、路径查找的内容,接下来介绍do_sys_open的最后一个操作,即文件打开接口do_last的分析。本小节主要涉及do_last、finish_open、may_open、lookup_open这几个接口的介绍。

do_last接口主要用于打开文件,包括文件创建等操作。

do_last接口实现如下功能:

1.若为目录(链接目录、根目录、当前目录、当前目录的父目录),则返回失败

2.若为链接文件,返回1,由上层函数对链接文件进行查找操作

3.若文件不存在且拥有创建文件权限,则创建文件对应的inode节点,并打开文件

4.若文件不存在且不拥有创建文件权限,则返回失败

5.若文件存在,则打开文件(若对该文件没有相应的权限,则返回失败)

这个接口函数设置的函数调用比较多,不过其主要实现的功能即为上面说明的几个。

static int do_last(struct nameidata *nd, struct path *path,

struct file *file, const struct open_flags *op,

int *opened, struct filename *name)

{

struct dentry *dir = nd->path.dentry;/*获取到文件所在父目录对应的dentry变量*/

int open_flag = op->open_flag;/*获取open的flag标识*/

bool will_truncate = (open_flag & O_TRUNC) != 0;/*标识是否可修改文件大小*/

bool got_write = false;

int acc_mode = op->acc_mode;

struct inode *inode;

bool symlink_ok = false;/*标识文件是否为链接文件*/

struct path save_parent = { .dentry = NULL, .mnt = NULL };

bool retried = false;

int error;

/*nd->flags 去除LOOKUP_PARENT*/

nd->flags &= ~LOOKUP_PARENT;

nd->flags |= op->intent;

/*判断last_type,若为如下三种,则会打开文件失败:

1.若为LAST_DOTDOT、LAST_DOT,则说明路径查找的最后一级分量为目录,且不是文件,更新nd->path相关的变量,然后进入finish_open

,在may_open中,即会返回失败

2.若为LAST_ROOT,则说明亦是目录,同样在may_open时会返回失败

3.若为链接目录,则同样在may_open时,会返回失败*/

switch (nd->last_type) {

case LAST_DOTDOT:

case LAST_DOT:

error = handle_dots(nd, nd->last_type);

if (error)

return error;

/* fallthrough */

case LAST_ROOT:

error = complete_walk(nd);

if (error)

return error;

/*审计相关的信息*/

audit_inode(name, nd->path.dentry, 0);

if (open_flag & O_CREAT) {

error = -EISDIR;

goto out;

}

goto finish_open;

case LAST_BIND:

error = complete_walk(nd);

if (error)

return error;

audit_inode(name, dir, 0);

goto finish_open;

}

/*当打开文件的flag中,没有创建文件的权限时,则调用lookup_fast,根据文件名称,在其父dentry的hash链表中,获取文件对应dentry与inode

若查找成功,则调用finish_lookup进行后续操作*/

if (!(open_flag & O_CREAT)) {

if (nd->last.name[nd->last.len])

nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;

if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))

symlink_ok = true;

/* we _can_ be in RCU mode here */

error = lookup_fast(nd, path, &inode);

if (likely(!error))

goto finish_lookup;

if (error < 0)

goto out;

BUG_ON(nd->inode != dir->d_inode);

} else {

/* create side of things */

/*

* This will *only* deal with leaving RCU mode - LOOKUP_JUMPED

* has been cleared when we got to the last component we are

* about to look up

*/

error = complete_walk(nd);

if (error)

return error;

audit_inode(name, dir, LOOKUP_PARENT);

error = -EISDIR;

/* trailing slashes? */

if (nd->last.name[nd->last.len])

goto out;

}

retry_lookup:

/*若可以创建文件,则设置got_write*/

if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {

error = mnt_want_write(nd->path.mnt);

if (!error)

got_write = true;

/*

* do _not_ fail yet - we might not need that or fail with

* a different error; let lookup_open() decide; we'll be

* dropping this one anyway.

*/

}

mutex_lock(&dir->d_inode->i_mutex);

error = lookup_open(nd, path, file, op, got_write, opened);

mutex_unlock(&dir->d_inode->i_mutex);

if (error <= 0) {

if (error)

goto out;

if ((*opened & FILE_CREATED) ||

!S_ISREG(file_inode(file)->i_mode))

will_truncate = false;

audit_inode(name, file->f_path.dentry, 0);

goto opened;

}

/*若为新创建的文件,则去除O_TRUN权限,并更新nd->path变量,并跳转至finish_open_created继续进行*/

if (*opened & FILE_CREATED) {

/* Don't check for write permission, don't truncate */

open_flag &= ~O_TRUNC;

will_truncate = false;

acc_mode = MAY_OPEN;

path_to_nameidata(path, nd);

goto finish_open_created;

}

/*

* create/update audit record if it already exists.

*/

/*执行审计相关的操作*/

if (path->dentry->d_inode)

audit_inode(name, path->dentry, 0);

/*

* If atomic_open() acquired write access it is dropped now due to

* possible mount and symlink following (this might be optimized away if

* necessary...)

*/

/*针对atomic_open相关的操作*/

if (got_write) {

mnt_drop_write(nd->path.mnt);

got_write = false;

}

error = -EEXIST;

if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))

goto exit_dput;

error = follow_managed(path, nd->flags);

if (error < 0)

goto exit_dput;

if (error)

nd->flags |= LOOKUP_JUMPED;

BUG_ON(nd->flags & LOOKUP_RCU);

inode = path->dentry->d_inode;

finish_lookup:

/* we _can_ be in RCU mode here */

/*若文件不存在(即inode节点不存在),则更新nd->path,返回文件不存在错误*/

error = -ENOENT;

if (!inode) {

path_to_nameidata(path, nd);

goto out;

}

/*若文件存在,则且为链接文件,则返回1,调用链接文件的处理代码*/

if (should_follow_link(inode, !symlink_ok)) {

if (nd->flags & LOOKUP_RCU) {

if (unlikely(unlazy_walk(nd, path->dentry))) {

error = -ECHILD;

goto out;

}

}

BUG_ON(inode != path->dentry->d_inode);

return 1;

}

/*更新nd->path等变量*/

if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {

path_to_nameidata(path, nd);

} else {

save_parent.dentry = nd->path.dentry;

save_parent.mnt = mntget(path->mnt);

nd->path.dentry = path->dentry;

}

nd->inode = inode;

/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */

error = complete_walk(nd);

if (error) {

path_put(&save_parent);

return error;

}

/*若为目录,返回失败*/

error = -EISDIR;

if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))

goto out;

error = -ENOTDIR;

if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))

goto out;

/*审计相关*/

audit_inode(name, nd->path.dentry, 0);

finish_open:

/*若不是普通文件,则will_truncate设置为false,即不能调整文件大小*/

if (!S_ISREG(nd->inode->i_mode))

will_truncate = false;

/*若可以修改文件大小,则调用mnt_want_write,确认是否*/

if (will_truncate) {

error = mnt_want_write(nd->path.mnt);

if (error)

goto out;

got_write = true;

}

finish_open_created:

/*调用may_open接口,判断是否可打开该文件*/

error = may_open(&nd->path, acc_mode, open_flag);

if (error)

goto out;

/*至此,可正常打开一个文件,则设置struct file*指针的f_path.mnt变量*/

file->f_path.mnt = nd->path.mnt;

error = finish_open(file, nd->path.dentry, NULL, opened);

if (error) {

if (error == -EOPENSTALE)

goto stale_open;

goto out;

}

opened:

error = open_check_o_direct(file);

if (error)

goto exit_fput;

error = ima_file_check(file, op->acc_mode);

if (error)

goto exit_fput;

/*调整文件大小*/

if (will_truncate) {

error = handle_truncate(file);

if (error)

goto exit_fput;

}

out:

if (got_write)

mnt_drop_write(nd->path.mnt);

path_put(&save_parent);

terminate_walk(nd);

return error;

exit_dput:

path_put_conditional(path, nd);

goto out;

exit_fput:

fput(file);

goto out;

stale_open:

/* If no saved parent or already retried then can't retry */

if (!save_parent.dentry || retried)

goto out;

BUG_ON(save_parent.dentry != dir);

path_put(&nd->path);

nd->path = save_parent;

nd->inode = dir->d_inode;

save_parent.mnt = NULL;

save_parent.dentry = NULL;

if (got_write) {

mnt_drop_write(nd->path.mnt);

got_write = false;

}

retried = true;

goto retry_lookup;

}

下面针对上面调用的lookup_open、may_open、finish_open这几个接口进行分析

lookup_open接口分析

该接口包括文件的查找以及文件创建等功能

该接口主要用于查找指定名称的文件,主要包括如下几种可能

1. 从dentry的dcache中查找文件,若查找成功,则更新path变量,返回成功

2.若从dentry 的dcache中没有查找到文件,则调用dentry的looku接口,查找dentry的子dentry是否存在符合条件的子dentry

若查找成功,更新path变量,返回成功;

若查找失败,则判断是否有创建文件的权限:

若有创建文件的权限,则调用vfs_create接口创建文件,更新path变量,返回成功;

若没有创建文件的权限,则更新path变量后,返回成功

static int lookup_open(struct nameidata *nd, struct path *path,

struct file *file,

const struct open_flags *op,

bool got_write, int *opened)

{

struct dentry *dir = nd->path.dentry;

struct inode *dir_inode = dir->d_inode;

struct dentry *dentry;

int error;

bool need_lookup;

/*调用lookup_dcache,从dcache中查找文件(文件名为nd->last),若没有查找到,则设置need_lookup为true*/

*opened &= ~FILE_CREATED;

dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup);

if (IS_ERR(dentry))

return PTR_ERR(dentry);

/*若查找到dentr,且dentry->inode存在,则更新path变量,并返回1*/

/* Cached positive dentry: will open in f_op->open */

if (!need_lookup && dentry->d_inode)

goto out_no_open;

if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {

return atomic_open(nd, dentry, path, file, op, got_write,

need_lookup, opened);

}

/*若在dcache中没有查找到,则调用lookup_real进行查找(调用parennt dentry的lookup接口进行查找)*/

if (need_lookup) {

BUG_ON(dentry->d_inode);

dentry = lookup_real(dir_inode, dentry, nd->flags);

if (IS_ERR(dentry))

return PTR_ERR(dentry);

}

/* Negative dentry, just create the file */

/*若文件不存在,且open flag中包含了create权限,则调用vfs_create创建文件*/

if (!dentry->d_inode && (op->open_flag & O_CREAT)) {

umode_t mode = op->mode;

if (!IS_POSIXACL(dir->d_inode))

mode &= ~current_umask();

/*

* This write is needed to ensure that a

* rw->ro transition does not occur between

* the time when the file is created and when

* a permanent write count is taken through

* the 'struct file' in finish_open().

*/

if (!got_write) {

error = -EROFS;

goto out_dput;

}

*opened |= FILE_CREATED;

error = security_path_mknod(&nd->path, dentry, mode, 0);

if (error)

goto out_dput;

error = vfs_create(dir->d_inode, dentry, mode,

nd->flags & LOOKUP_EXCL);

if (error)

goto out_dput;

}

out_no_open:

path->dentry = dentry;

path->mnt = nd->path.mnt;

return 1;

out_dput:

dput(dentry);

return error;

}

其中针对vfs_create接口,其定义如下,主要调用dentry的create接口进行inode节点的创建,这就涉及到具体文件系统的create接口调用

int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,

bool want_excl)

{

int error = may_create(dir, dentry);

if (error)

return error;

if (!dir->i_op->create)

return -EACCES; /* shouldn't it be ENOSYS? */

mode &= S_IALLUGO;

mode |= S_IFREG;

error = security_inode_create(dir, dentry, mode);

if (error)

return error;

error = dir->i_op->create(dir, dentry, mode, want_excl);

if (!error)

fsnotify_create(dir, dentry);

return error;

}

may_open接口分析

该接口主要用于判断是否可以打开一个文件,主要涉及以下几个方面的判断:

1.若文件的flag设置了O_PATH标签,则返回可以;

2.若为链接文件,则返回失败;

3.若为字符或块设备,且该文件对应的文件系统不允许访问设备文件,则返回失败

4.若打开文件的标签与超级块或者inode的权限不匹配,返回失败

5.针对inode标记为S_APPEND或者打开文件标签为O_NOATIME时,做相应的合法性判断

static int may_open(struct path *path, int acc_mode, int flag)

{

struct dentry *dentry = path->dentry;

struct inode *inode = dentry->d_inode;

int error;

/* 若设置了O_PATH,则直接返回0,即可以打开文件,因O_PATH并不真正打开文件,因此若为O_PATH,则此时直接返回0*/

if (!acc_mode)

return 0;

/*若文件的inode节点不存在,返回ENOENT失败*/

if (!inode)

return -ENOENT;

/*判断文件的类型,主要包括如下判断:

1.若为链接文件,返回失败(失败原因为链接数太多)

2.若为字符设备或块设备,且该文件对应的文件系统不支持访问设备文件,则返回失败

3.若要打开的文件为目录,则返回失败

4.若为fifo或socket文件,则不能修改文件大小,去除S_IFSOCK标签*/

switch (inode->i_mode & S_IFMT) {

case S_IFLNK:

return -ELOOP;

case S_IFDIR:

if (acc_mode & MAY_WRITE)

return -EISDIR;

break;

case S_IFBLK:

case S_IFCHR:

if (path->mnt->mnt_flags & MNT_NODEV)

return -EACCES;

/*FALLTHRU*/

case S_IFIFO:

case S_IFSOCK:

flag &= ~O_TRUNC;

break;

}

/*调用inode_permission,判断超级块、inode节点是否允许acc_mode标识的权限*/

error = inode_permission(inode, acc_mode);

if (error)

return error;

/*

* An append-only file must be opened in append mode for writing.

*/

/*若inode节点标识了S_APPEND,则对文件只能以readonly方式或者追加的方式打开(且不能设置O_TRUNC)*/

if (IS_APPEND(inode)) {

if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))

return -EPERM;

if (flag & O_TRUNC)

return -EPERM;

}

/*仅在进程所有者是文件的所有者或者超级用户权限,方可不记录访问文件的atime*/

/* O_NOATIME can only be set by the owner or superuser */

if (flag & O_NOATIME && !inode_owner_or_capable(inode))

return -EPERM;

return 0;

}

finish_open接口分析

该接口主要调用do_dentry_open接口,实现文件的打开操作

int finish_open(struct file *file, struct dentry *dentry,

int (*open)(struct inode *, struct file *),

int *opened)

{

int error;

/*若文件已经打开,则抛出异常*/

BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */

/*设置struct file *类型指针变量的f_path.dentry*/

file->f_path.dentry = dentry;

/*调用do_dentry_open执行打开文件操作,期间会调用各文件系统具体的open接口等*/

error = do_dentry_open(file, open, current_cred());

if (!error)

*opened |= FILE_OPENED;

return error;

}

do_dentry_open接口的定义如下,该接口通过调用inode->i_fop获取文件操作的接口函数指针。

static int do_dentry_open(struct file *f,

int (*open)(struct inode *, struct file *),

const struct cred *cred)

{

static const struct file_operations empty_fops = {};

struct inode *inode;

int error;

/*设置文件打开的mode*/

f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |

FMODE_PREAD | FMODE_PWRITE;

/*若为O_PATH,则设置f->f_mode为FMODE_PATH*/

if (unlikely(f->f_flags & O_PATH))

f->f_mode = FMODE_PATH;

/*增加对mnt、dentry的引用计数*/

path_get(&f->f_path);

/*设置文件描述符的f_inode,至此完成了文件描述符与该文件对应的dentry、mnt、inode的关联*/

inode = f->f_inode = f->f_path.dentry->d_inode;

/*若设置了文件的写访问权限,则判断当前用户是否对该文件拥有写权限*/

if (f->f_mode & FMODE_WRITE) {

error = __get_file_write_access(inode, f->f_path.mnt);

if (error)

goto cleanup_file;

if (!special_file(inode->i_mode))

file_take_write(f);

}

f->f_mapping = inode->i_mapping;

file_sb_list_add(f, inode->i_sb);

if (unlikely(f->f_mode & FMODE_PATH)) {

f->f_op = &empty_fops;

return 0;

}

/*通过该操作,实现将文件inode节点对应i_fop指针函数赋值给file->f_op*/

f->f_op = fops_get(inode->i_fop);

error = security_file_open(f, cred);

if (error)

goto cleanup_all;

error = break_lease(inode, f->f_flags);

if (error)

goto cleanup_all;

if (!open && f->f_op)

open = f->f_op->open;

if (open) {

error = open(inode, f);

if (error)

goto cleanup_all;

}

if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)

i_readcount_inc(inode);

f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);

file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);

return 0;

cleanup_all:

fops_put(f->f_op);

file_sb_list_del(f);

if (f->f_mode & FMODE_WRITE) {

put_write_access(inode);

if (!special_file(inode->i_mode)) {

/*

* We don't consider this a real

* mnt_want/drop_write() pair

* because it all happenend right

* here, so just reset the state.

*/

file_reset_write(f);

__mnt_drop_write(f->f_path.mnt);

}

}

cleanup_file:

path_put(&f->f_path);

f->f_path.mnt = NULL;

f->f_path.dentry = NULL;

f->f_inode = NULL;

return error;

}

以上即为do_sys_open接口的分析,主要涉及路径查找、链接目录、链接文件的查找、文件的打开等操作,简单来说就是

实现了将文件系统相关的结构体(dentry、inode、task、fs_struct、file等结构体的关联)。针对内核模块的分析,建议

首先进行结构体的分析,当结构体之间的关联图理清后,则基本上对内核模块的实现有了大致的了解。

Tags:

最近发表
标签列表