qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] block/file-posix: fix the wrong result of find_


From: 王彥傑
Subject: Re: [Qemu-devel] [PATCH] block/file-posix: fix the wrong result of find_allocation() in macOS.
Date: Tue, 11 Sep 2018 10:01:50 +0800
User-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:60.0) Gecko/20100101 Thunderbird/60.0

This is the program I used to check the behavior of SEEK_DATA and SEEK_HOLE.

#define _FILE_OFFSET_BITS 64
#define _LARGEFILE64_SOURCE 1
#include <sys/types.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdint.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>

/*
 * This function is suitable for macOS and Solaris' lseek behavior,
 * since start is always set to the beginning of a data region or a hole.
 * Please see the logics in main()
 */
static int find_allocation(int fd, off_t start, off_t *data, off_t*hole)
{
    off_t offs;

    offs = lseek(fd, start, SEEK_DATA);

    if (offs < 0) {
        return -errno;
    }

    if (offs < start) {
        return -EIO;
    }

    if (offs > start) {
        /* D2: start is at the beginning of hole */
        *hole = start;
        *data = offs;
        return 0;
    }

    /* D1: start is at beginning of data region */
    offs = lseek(fd, start, SEEK_HOLE);
    
    if (offs < 0) {
        return -errno;
    }

    if (offs < start) {
        return -EIO;
    }

    if (offs > start) {
        *data = start;
        *hole = offs;
        return 0;
    }

    return -EIO;
}

int main(int argc, const char* argv[])
{
    off_t first_data = -1, trailing_hole = -1;
    off_t cursor = 0;

    if (argc != 2) {
        fprintf(stderr, "Usage: %s path\n", argv[0]);
        return 1;
    }

    int fd = open(argv[1], O_RDONLY);
    if (fd < 0) {
        perror("Cannot open the file");
        return 1;
    }

    off_t filesize = lseek(fd, 0, SEEK_END);
    if (filesize < 0) {
        perror("Cannot get the file size");
        return 1;
    }

    if (filesize < 2) {
        fprintf(stderr, "Filesize is too small.\n");
        return 1;
    }

    printf ("Filesize: %lld\n", (long long)filesize);

    while(cursor < filesize) {
        off_t data, hole;
        int res;

        /* cursor is always at the beginning of a data region or a hole */
        res = find_allocation(fd, cursor, &data, &hole);
        
        if (res < 0 && res != -ENXIO) {
            fprintf(stderr, "The filesystem or platform being checked does not 
support SEEK_DATA or SEEK_HOLE.\n");
            perror("");
            return 1;
        }

        if (res == -ENXIO) {
            /* we are at the trailing hole */
            trailing_hole = cursor;
            break;
        }

        if (data == cursor && (hole - data) > 1) {
            /* the length of the data region must be greater than 1. */
            if (first_data == -1) {
                first_data = cursor;
            }
        }

        if (data == cursor) {
            cursor = hole;
        } else {
            cursor = data;
        }
    }

    if (first_data >= 0) {
        printf("Checking for SEEK_DATA by using the data region at %lld... ", 
(long long)first_data);
        /* first_data plus 1 makes the offset in the middle of a data region */
        errno = 0;
        off_t offs = lseek(fd, first_data + 1, SEEK_DATA);

        if (offs < 0 && errno != ENXIO) {
            printf("Error\n");
            perror(" Msg");
        } else if (errno == ENXIO || offs > first_data + 1) {
            /* offs is set to the next data region. This is macOS's behavior */
            printf("macOS\n");
        } else if (offs == first_data + 1) {
            printf("Linux\n");
        } else {
            printf("Unknown behavior\n");
        }
    } else {
        fprintf(stderr, "There is no data region which is suitable to be 
checked.\n");
    }

    if (trailing_hole >= 0) {
        printf("Checking for SEEK_HOLE by using the trailing hole at %lld... ", 
(long long)trailing_hole);
        off_t offs = lseek(fd, trailing_hole, SEEK_HOLE);

        if (offs < 0) {
            printf("Error\n");
            perror(" Msg");
        } else if (offs == filesize) {
            /* offs is set to EOF. This is Solaris' behavior */
            printf("Solaris\n");
        } else if (offs == trailing_hole) {
            printf("Linux\n");
        } else {
            printf("Unknown behavior\n");
        }
    } else {
        fprintf(stderr, "There is no trailing hole which is suitable to be 
checked.\n");
    }

    return 0;
}



Peter Maydell - 2018/9/8 11:34 PM:
> On 8 September 2018 at 15:15, Yan-Jie Wang <address@hidden> wrote:
>> In macOS, lseek with SEEK_DATA behaves differently.
>> It seeks to the next data region even though offset is in the middle of
>> a data region. In addition, there may be many data regions without any
>> hole among them, like this: |---Data---|---Data---|
>>
>> Because of this, qemu-img convert with raw images as input may create
>> corrupted images in macOS especially for large files, and qemu-img
>> map may also report wrong things. This patch fixes this undesired
>> behaviors.
> 
> Hi. I have two general questions here:
> (1) is this behaviour of SEEK_DATA specific to macOS, or do the
> other BSDs (FreeBSD, OpenBSD, NetBSD) also have it ?
I have installed FreeBSD in Virtualbox and checked the behavior of lseek in 
FreeBSD.
The behavior of SEEK_DATA is the same as the one in Linux.

> (2) is there a way to determine which flavour of SEEK_DATA we
> have as a configure-time test rather than having to hardcode
> an OS-specific #ifdef ?
macOS can be installed on HFS+ or APFS filesystem. Only APFS supports SEEK_DATA 
and SEEK_HOLE.

If we try to build qemu on HFS+ filesystem, it is not possible to detect the 
behavior
of SEEK_DATA and SEEK_HOLE on configure-time.  lseek with SEEK_DATA or 
SEEK_HOLE returns
errors when the file being checked is on HFS+ filesystem. (I have checked it by 
formatting
my USB thumb drive to HFS+ filesystem and running the program provided at the 
top of this email on the file located at the HFS+ filesystem on my thumb drive.)

> 
> thanks
> -- PMM
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]