qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [PATCH v4 04/16] hw/9pfs: Implement Windows specific xxxdir() APIs


From: Shi, Guohuai
Subject: RE: [PATCH v4 04/16] hw/9pfs: Implement Windows specific xxxdir() APIs
Date: Fri, 3 Feb 2023 16:30:35 +0000


> -----Original Message-----
> From: Christian Schoenebeck <qemu_oss@crudebyte.com>
> Sent: Friday, February 3, 2023 22:41
> To: Greg Kurz <groug@kaod.org>; qemu-devel@nongnu.org
> Cc: Meng, Bin <Bin.Meng@windriver.com>; Marc-André Lureau
> <marcandre.lureau@redhat.com>; Daniel P. Berrangé <berrange@redhat.com>; Shi,
> Guohuai <Guohuai.Shi@windriver.com>
> Subject: Re: [PATCH v4 04/16] hw/9pfs: Implement Windows specific xxxdir()
> APIs
> 
> CAUTION: This email comes from a non Wind River email account!
> Do not click links or open attachments unless you recognize the sender and
> know the content is safe.
> 
> On Friday, February 3, 2023 2:34:13 PM CET Shi, Guohuai wrote:
> >
> > > -----Original Message-----
> > > From: Christian Schoenebeck <qemu_oss@crudebyte.com>
> > > Sent: Friday, February 3, 2023 20:25
> > > To: Greg Kurz <groug@kaod.org>; qemu-devel@nongnu.org
> > > Cc: Shi, Guohuai <Guohuai.Shi@windriver.com>; Meng, Bin
> > > <Bin.Meng@windriver.com>; Marc-André Lureau
> > > <marcandre.lureau@redhat.com>; Daniel P. Berrangé
> > > <berrange@redhat.com>
> > > Subject: Re: [PATCH v4 04/16] hw/9pfs: Implement Windows specific
> > > xxxdir() APIs
> > >
> > > CAUTION: This email comes from a non Wind River email account!
> > > Do not click links or open attachments unless you recognize the
> > > sender and know the content is safe.
> > >
> > > On Monday, January 30, 2023 10:51:50 AM CET Bin Meng wrote:
> > > > From: Guohuai Shi <guohuai.shi@windriver.com>
> > > >
> > > > This commit implements Windows specific xxxdir() APIs for safety
> > > > directory access.
> > > >
> > >
> > > This issue deserves a link to either the previous discussion
> > >
> > > Link: https://lore.kernel.org/qemu-devel/2830993.GtbaR8S6b6@silver/
> > >
> > > and/or a link to this continuation of the discussion here, as it's
> > > not a trivial issue, with pros and cons been discussed for the
> > > individual, possible solutions.
> > >
> > > > Signed-off-by: Guohuai Shi <guohuai.shi@windriver.com>
> > > > Signed-off-by: Bin Meng <bin.meng@windriver.com>
> > > > ---
> > > >
> > > >  hw/9pfs/9p-util.h       |   6 +
> > > >  hw/9pfs/9p-util-win32.c | 296
> > > > ++++++++++++++++++++++++++++++++++++++++
> > > >  2 files changed, 302 insertions(+)
> > > >
> > > > diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index
> > > > 0f159fb4ce..c1c251fbd1 100644
> > > > --- a/hw/9pfs/9p-util.h
> > > > +++ b/hw/9pfs/9p-util.h
> > > > @@ -141,6 +141,12 @@ int unlinkat_win32(int dirfd, const char
> > > > *pathname, int flags);  int statfs_win32(const char *root_path,
> > > > struct statfs *stbuf);  int openat_dir(int dirfd, const char
> > > > *name);  int openat_file(int dirfd, const char *name, int flags,
> > > > mode_t mode);
> > > > +DIR *opendir_win32(const char *full_file_name); int
> > > > +closedir_win32(DIR *pDir); struct dirent *readdir_win32(DIR
> > > > +*pDir); void rewinddir_win32(DIR *pDir); void seekdir_win32(DIR
> > > > +*pDir, long pos); long telldir_win32(DIR *pDir);
> > > >  #endif
> > > >
> > > >  static inline void close_preserve_errno(int fd) diff --git
> > > > a/hw/9pfs/9p-util-win32.c b/hw/9pfs/9p-util-win32.c index
> > > > a99d579a06..5503199300 100644
> > > > --- a/hw/9pfs/9p-util-win32.c
> > > > +++ b/hw/9pfs/9p-util-win32.c
> > > > @@ -37,6 +37,13 @@
> > > >   *    Windows does not support opendir, the directory fd is created by
> > > >   *    CreateFile and convert to fd by _open_osfhandle(). Keep the fd
> open
> > > will
> > > >   *    lock and protect the directory (can not be modified or replaced)
> > > > + *
> > > > + * 5. Windows and MinGW does not provide safety directory
> > > > + accessing
> > > functions.
> > > > + *    readdir(), seekdir() and telldir() may get or set wrong value
> > > because
> > > > + *    directory entry data is not protected.
> > >
> > > I would rephrase that sentence, as it doesn't cover the root problem
> > > adequately. Maybe something like this:
> > >
> > > 5. Neither Windows native APIs, nor MinGW provide a POSIX compatible
> > > API for acquiring directory entries in a safe way. Calling those
> > > APIs (native
> > > _findfirst() and _findnext() or MinGW's readdir(), seekdir() and
> > > telldir()) directly can lead to an inconsistent state if directory
> > > is modified in between, e.g. the same directory appearing more than
> > > once in output, or directories not appearing at all in output even
> > > though they were neither newly created nor deleted. POSIX does not
> > > define what happens with deleted or newly created directories in between,
> but it guarantees a consistent state.
> > >
> > > > + *
> > > > + *    This file re-write POSIX directory accessing functions and cache
> all
> > > > + *    directory entries during opening.
> > > >   */
> > > >
> > > >  #include "qemu/osdep.h"
> > > > @@ -51,6 +58,27 @@
> > > >
> > > >  #define V9FS_MAGIC  0x53465039  /* string "9PFS" */
> > > >
> > > > +/*
> > > > + * MinGW and Windows does not provide safety way to seek
> > > > +directory while other
> > > > + * thread is modifying same directory.
> > > > + *
> > > > + * The two structures are used to cache all directory entries
> > > > +when opening
> > > it.
> > > > + * Cached entries are always returned for read or seek.
> > > > + */
> > > > +struct dir_win32_entry {
> > > > +    QSLIST_ENTRY(dir_win32_entry) node;
> > > > +    struct _finddata_t dd_data;
> > > > +};
> > > > +
> > > > +struct dir_win32 {
> > > > +    struct dirent dd_dir;
> > > > +    uint32_t offset;
> > > > +    uint32_t total_entries;
> > > > +    QSLIST_HEAD(, dir_win32_entry) head;
> > > > +    struct dir_win32_entry *current;
> > > > +    char dd_name[1];
> > > > +};
> > > > +
> > > >  /*
> > > >   * win32_error_to_posix - convert Win32 error to POSIX error number
> > > >   *
> > > > @@ -977,3 +1005,271 @@ int qemu_mknodat(int dirfd, const char
> > > > *filename,
> > > mode_t mode, dev_t dev)
> > > >      errno = ENOTSUP;
> > > >      return -1;
> > > >  }
> > > > +
> > > > +/*
> > > > + * opendir_win32 - open a directory
> > > > + *
> > > > + * This function opens a directory and caches all directory entries.
> > > > + */
> > > > +DIR *opendir_win32(const char *full_file_name) {
> > > > +    HANDLE hDir = INVALID_HANDLE_VALUE;
> > > > +    DWORD attribute;
> > > > +    intptr_t dd_handle = -1;
> > > > +    struct _finddata_t dd_data;
> > > > +
> > > > +    struct dir_win32 *stream = NULL;
> > > > +    struct dir_win32_entry *dir_entry;
> > > > +    struct dir_win32_entry *prev;
> > > > +    struct dir_win32_entry *next;
> > > > +
> > > > +    int err = 0;
> > > > +    int find_status;
> > > > +    uint32_t index;
> > > > +
> > > > +    /* open directory to prevent it being removed */
> > > > +
> > > > +    hDir = CreateFile(full_file_name, GENERIC_READ,
> > > > +                      FILE_SHARE_READ | FILE_SHARE_WRITE |
> > > FILE_SHARE_DELETE,
> > > > +                      NULL,
> > > > +                      OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS,
> > > > + NULL);
> > > > +
> > > > +    if (hDir == INVALID_HANDLE_VALUE) {
> > > > +        err = win32_error_to_posix(GetLastError());
> > > > +        goto out;
> > > > +    }
> > > > +
> > > > +    attribute = GetFileAttributes(full_file_name);
> > > > +
> > > > +    /* symlink is not allow */
> > > > +    if (attribute == INVALID_FILE_ATTRIBUTES
> > > > +        || (attribute & FILE_ATTRIBUTE_REPARSE_POINT) != 0) {
> > > > +        err = EACCES;
> > > > +        goto out;
> > > > +    }
> > > > +
> > > > +    /* check if it is a directory */
> > > > +    if ((attribute & FILE_ATTRIBUTE_DIRECTORY) == 0) {
> > > > +        err = ENOTDIR;
> > > > +        goto out;
> > > > +    }
> > > > +
> > > > +    /*
> > > > +     * findfirst() need suffix format name like "\dir1\dir2\*",
> > > > + allocate
> > > more
> > > > +     * buffer to store suffix.
> > > > +     */
> > > > +    stream = g_malloc0(sizeof(struct dir_win32) +
> > > > + strlen(full_file_name) +
> > > 3);
> > > > +    QSLIST_INIT(&stream->head);
> > > > +
> > > > +    strcpy(stream->dd_name, full_file_name);
> > > > +    strcat(stream->dd_name, "\\*");
> > > > +
> > > > +    dd_handle = _findfirst(stream->dd_name, &dd_data);
> > > > +
> > > > +    if (dd_handle == -1) {
> > > > +        err = errno;
> > > > +        goto out;
> > > > +    }
> > > > +
> > > > +    index = 0;
> > > > +
> > > > +    /* read all entries to link list */
> > > > +    do {
> > > > +        dir_entry = g_malloc0(sizeof(struct dir_win32_entry));
> > > > +        memcpy(&dir_entry->dd_data, &dd_data, sizeof(dd_data));
> > > > +        if (index == 0) {
> > > > +            QSLIST_INSERT_HEAD(&stream->head, dir_entry, node);
> > > > +        } else {
> > > > +            QSLIST_INSERT_AFTER(prev, dir_entry, node);
> > > > +        }
> > > > +
> > > > +        prev = dir_entry;
> > > > +        find_status = _findnext(dd_handle, &dd_data);
> > > > +
> > > > +        index++;
> > > > +    } while (find_status == 0);
> > >
> > > So you decided to go for the solution that caches all entries of a
> > > directory in RAM.
> > >
> > > So don't you think my last suggested solution that would call native
> > > _findfirst() and _findnext() directly, but without any chaching and
> > > instead picking the relevent entry simply by inode number, might be
> > > a better candidate as a starting point for landing Windows support?
> > > Link to that previous
> > > suggestion:
> > >
> > > https://lore.kernel.org/qemu-devel/2468168.SvRIHAoRfs@silver/
> > >
> >
> > I did a quick test for caching data without name entry, but it failed for
> reading + deleting directory on Windows host (like "rm -rf" for a directory).
> > The root cause is: Windows's directory entry is not cached.
> > If there is 100 files in a directory:
> >
> > File1
> > File2
> > ...
> > File100
> >
> > When "rm -rf" is working:
> >
> > It read first 10 entries, and remove them. 9pfs may seek and re-seek to
> offset 10 to read next 10 entries.
> > But Windows and MinGW does not provide rewinddir.
> > If we using findfirst() and findnext to seek to offset 10, then we will not
> get File11 but get File 21 (because we skipped 10 entries by seekdir()).
> 
> I assume you are referring to a simple solution like MinGW does, i.e. a
> consecutive dense index (0,1,2,3,...n-1 where n is the current total amount
> of directory entries). That would not work, yes. But that's not what I
> suggested.
> 
> With an inode number based lookup you would not seek to an incorrect entry
> ...
> 
> > If we removed some entries in directory, inode number is useless because we
> can not find it again.
> 
> You *can* recover from the previous inode number, even if any directory entry
> has been deleted in the meantime: you would lookup the entry with the next
> higher inode number.
> 
> Example, say initial directory state on host is:
> 
> name   inode-nr
> aaa    8
> bbb    3
> ccc    4
> ddd    2
> eee    9
> 
> Say client is looking up exactly 2 entries, you would return to client in
> this order (by inode-nr):
> 
> 1. ddd
> 2. bbb
> 
> Now say "bbb" (a.k.a. previous) and "ccc" (a.k.a next) are removed. Directory
> state on host is now:
> 
> name   inode-nr
> aaa    8
> ddd    2
> eee    9
> 
> Subsequently the last directory entries are requested by client. Previous
> inode number (stored in RAM) was 3, which no longer exists, so you lookup the
> entry with the next higher inode number than 3, which is now 8 in this
> example. Hence you would eventually return to client (in this order):
> 
> 3. aaa
> 4. eee
> 

Yes, it can work by using inode number (called File ID on Windows host: 
https://learn.microsoft.com/en-us/windows/win32/api/winbase/ns-winbase-file_id_info).
However, Windows does not provide a function to get file information by file ID.
That means, for anytime of seeking directory, 9pfs need to do the following 
sequence work to locate a name entry:

1. findfirst
2. CreateFile to get file handle
3. GetFileInformationByHandleEx to get file ID 
(https://learn.microsoft.com/en-us/windows/win32/api/minwinbase/ne-minwinbase-file_info_by_handle_class)
4. Close file handle and return if the file ID is match
5. findnext
6. repeat to step #2

Windows does not short file name entry by file ID and the file ID is 128-bit 
integer.
When there are many entries in directory, seeking directory will cause a very 
bad performance.

So I think store all name entries would be better than store all file ID.


> >
> >
> > Thanks
> > Guohuai
> >
> >
> > > > +
> > > > +    if (errno == ENOENT) {
> > > > +        /* No more matching files could be found, clean errno */
> > > > +        errno = 0;
> > > > +    } else {
> > > > +        err = errno;
> > > > +        goto out;
> > > > +    }
> > > > +
> > > > +    stream->total_entries = index;
> > > > +    stream->current = QSLIST_FIRST(&stream->head);
> > > > +
> > > > +out:
> > > > +    if (err != 0) {
> > > > +        errno = err;
> > > > +        /* free whole list */
> > > > +        if (stream != NULL) {
> > > > +            QSLIST_FOREACH_SAFE(dir_entry, &stream->head, node, next)
> {
> > > > +                QSLIST_REMOVE(&stream->head, dir_entry,
> > > > +dir_win32_entry,
> > > node);
> > > > +                g_free(dir_entry);
> > > > +            }
> > > > +            g_free(stream);
> > > > +            stream = NULL;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    /* after cached all entries, this handle is useless */
> > > > +    if (dd_handle != -1) {
> > > > +        _findclose(dd_handle);
> > > > +    }
> > > > +
> > > > +    if (hDir != INVALID_HANDLE_VALUE) {
> > > > +        CloseHandle(hDir);
> > > > +    }
> > > > +
> > > > +    return (DIR *)stream;
> > > > +}
> > > > +
> > > > +/*
> > > > + * closedir_win32 - close a directory
> > > > + *
> > > > + * This function closes directory and free all cached resources.
> > > > + */
> > > > +int closedir_win32(DIR *pDir)
> > > > +{
> > > > +    struct dir_win32 *stream = (struct dir_win32 *)pDir;
> > > > +    struct dir_win32_entry *dir_entry;
> > > > +    struct dir_win32_entry *next;
> > > > +
> > > > +    if (stream == NULL) {
> > > > +        errno = EBADF;
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    /* free all resources */
> > > > +
> > > > +    QSLIST_FOREACH_SAFE(dir_entry, &stream->head, node, next) {
> > > > +        QSLIST_REMOVE(&stream->head, dir_entry, dir_win32_entry,
> node);
> > > > +        g_free(dir_entry);
> > > > +    }
> > > > +
> > > > +    g_free(stream);
> > > > +
> > > > +    return 0;
> > > > +}
> > > > +
> > > > +/*
> > > > + * readdir_win32 - read a directory
> > > > + *
> > > > + * This function reads a directory entry from cached entry list.
> > > > + */
> > > > +struct dirent *readdir_win32(DIR *pDir) {
> > > > +    struct dir_win32 *stream = (struct dir_win32 *)pDir;
> > > > +
> > > > +    if (stream == NULL) {
> > > > +        errno = EBADF;
> > > > +        return NULL;
> > > > +    }
> > > > +
> > > > +    if (stream->offset >= stream->total_entries) {
> > > > +        /* reach to the end, return NULL without set errno */
> > > > +        return NULL;
> > > > +    }
> > > > +
> > > > +    memcpy(stream->dd_dir.d_name,
> > > > +           stream->current->dd_data.name,
> > > > +           sizeof(stream->dd_dir.d_name));
> > > > +
> > > > +    /* Windows does not provide inode number */
> > > > +    stream->dd_dir.d_ino = 0;
> > > > +    stream->dd_dir.d_reclen = 0;
> > > > +    stream->dd_dir.d_namlen = strlen(stream->dd_dir.d_name);
> > > > +
> > > > +    stream->offset++;
> > > > +    stream->current = QSLIST_NEXT(stream->current, node);
> > > > +
> > > > +    return &stream->dd_dir;
> > > > +}
> > > > +
> > > > +/*
> > > > + * rewinddir_win32 - reset directory stream
> > > > + *
> > > > + * This function resets the position of the directory stream to
> > > > +the
> > > > + * beginning of the directory.
> > > > + */
> > > > +void rewinddir_win32(DIR *pDir)
> > > > +{
> > > > +    struct dir_win32 *stream = (struct dir_win32 *)pDir;
> > > > +
> > > > +    if (stream == NULL) {
> > > > +        errno = EBADF;
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    stream->offset = 0;
> > > > +    stream->current = QSLIST_FIRST(&stream->head);
> > > > +
> > > > +    return;
> > > > +}
> > > > +
> > > > +/*
> > > > + * seekdir_win32 - set the position of the next readdir() call in
> > > > +the directory
> > > > + *
> > > > + * This function sets the position of the next readdir() call in
> > > > +the directory
> > > > + * from which the next readdir() call will start.
> > > > + */
> > > > +void seekdir_win32(DIR *pDir, long pos) {
> > > > +    struct dir_win32 *stream = (struct dir_win32 *)pDir;
> > > > +    uint32_t index;
> > > > +
> > > > +    if (stream == NULL) {
> > > > +        errno = EBADF;
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    if (pos < -1) {
> > > > +        errno = EINVAL;
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    if (pos == -1 || pos >= (long)stream->total_entries) {
> > > > +        /* seek to the end */
> > > > +        stream->offset = stream->total_entries;
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    if (pos - (long)stream->offset == 0) {
> > > > +        /* no need to seek */
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    /* seek position from list head */
> > > > +
> > > > +    stream->current = QSLIST_FIRST(&stream->head);
> > > > +
> > > > +    for (index = 0; index < (uint32_t)pos; index++) {
> > > > +        stream->current = QSLIST_NEXT(stream->current, node);
> > > > +    }
> > > > +    stream->offset = index;
> > > > +
> > > > +    return;
> > > > +}
> > > > +
> > > > +/*
> > > > + * telldir_win32 - return current location in directory
> > > > + *
> > > > + * This function returns current location in directory.
> > > > + */
> > > > +long telldir_win32(DIR *pDir)
> > > > +{
> > > > +    struct dir_win32 *stream = (struct dir_win32 *)pDir;
> > > > +
> > > > +    if (stream == NULL) {
> > > > +        errno = EBADF;
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    if (stream->offset > stream->total_entries) {
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    return (long)stream->offset;
> > > > +}
> > > >
> > >
> >
> >
> >
> 
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]