Question

The Wikipedia page Interpretation what a variable-length range (VLA), listed the following C99 model Code:

float read_and_process(int n)
{
    float (*vals)[n] = malloc(sizeof(float[n]));

    for (int i = 0; i < n; ++i)
        (*vals)[i] = read_val();

    float ret = process(n, *vals);
    
    free(vals);
    
    return ret;
}

声明

有可能通过使用一个阵列的点子来拥有像VLA这样的动态储存的合成物。

该页上显示的C99 VLA真实代码是

float read_and_process(int n)
{
    float vals[n];

    for (int i = 0; i < n; ++i)
        vals[i] = read_val();

    return process(n, vals);
}

该法典最有可能造成阵列上的阵列,但没有得到保证(C标准没有界定必须存放的甚小口径终端),也无关紧要,只要记忆再一次被正确解冻到<条码>上,再一次又一次发放到。

Wikipedia页面使用malloc(>和free(>),因此,记忆将位于malloc(的存储器,通常为蒸.。但此处的<代码>*vals有什么影响?

我的问题是:替代版本如何不同于以下代码:

float read_and_process(int n)
{
    float *vals = malloc(sizeof(float) * n);

    for (int i = 0; i < n; ++i)
        vals[i] = read_val();

    float ret = process(n, vals);

    free(vals);

    return ret;
}

Won t process( in the real VLA edition only have a pointer to the range as it does in the edition above and has to得依赖n,其中告诉它有多少内容可以通过该协调人进入? 因此,使用<代码>(*val)[n],然后通过<代码>*val?

Update

请迅速增加<代码>程序>():

float process( int n, float * vals )
{
    // ...
}

如何改变这一进程的职能,上述三种版本中哪一种用于称为这一进程? 在所有这三种情况下,它都有一个浮值的浮标点,指第一种浮动值。不管他们身处脚.还是步子,都无关紧要。在<条码>小型()两种版本中,该文本将放在斜坡上,因此两种小型版本之间的区别是什么?

是的,我知道,Wikipedia替换版本和我的替换版本之间有不同的<代码>(阀),但还是这样,这甚至与整个代码样本有何关系? 您能否向我展示<代码>程序>()的编码样本,届时将发挥任何作用?

Answer 1

该建筑在通过2D阵列以发挥功能时具有优势。

I wouldn t call it a VLA. To me, a VLA is:

void
caller(int n)
{

    float arr[n];
}

I d (oosely) calls it, a pointer to an range with n s:

void
caller(int n)
{

    float (*arr)[n] = malloc(sizeof(float) * n);
    float (*arr2d)[n] = malloc(sizeof(float) * n * n);
}

如果没有特别协调人,我们将不得不人工处理浏览指数:

void
process(int w,int h,float *arr)
{

    for (int y = 0;  y < h;  ++y) {
        for (int x = 0;  x < w;  ++x)
            arr[(y * w) + x] = (y * w) + x;
    }
}

With the special pointer, we pass along dimensions:

void
process(int w,int h,float (*arr)[w])
{

    for (int y = 0;  y < h;  ++y) {
        for (int x = 0;  x < w;  ++x)
            arr[y][x] = (y * w) + x;
    }
}

例如,这里是最近对地雷的回答,我把“Real”2D阵列:

指出,我们可以发挥类似的作用:

void
process(int w,int h,float arr[h][w])
{

    for (int y = 0;  y < h;  ++y) {
        for (int x = 0;  x < w;  ++x)
            arr[y][x] = (y * w) + x;
    }
}

但我要说的是,这既无成事实。它对我来说,它“像”是一种万物,但就我而言,因为它是一个参数,它是为了描述打电话者通过点子。

然而,尽管我们能够利用这一辛迪加来进行争辩,但我们也不能在同一职能中使用这一.。我们需要特别提要:

void
process(int w,int h)
{

    float (*arr)[w] = malloc(sizeof(float) * w * h);

    for (int y = 0;  y < h;  ++y) {
        for (int x = 0;  x < w;  ++x)
            arr[y][x] = (y * w) + x;
    }

    free(arr);
}

我们的支持者不希望把三个论点传给想要操纵阵列的每一项职能。

我们可以建立一个带宽度、高度和阵列点的<条码>。那么,我们就能够把方向推向前进方向:

#include <stdlib.h>

struct arr2d {
    int w;
    int h;
    float *data;
};

void
process1(struct arr2d *arr)
{

    for (int y = 0;  y < arr->h;  ++y) {
        for (int x = 0;  x < arr->w;  ++x)
            arr->data[(y * arr->w) + x] = (y * arr->w) + x;
    }
}

void
process2(struct arr2d *arr)
{
    float (*data)[arr->w] = (void *) arr->data;

    for (int y = 0;  y < arr->h;  ++y) {
        for (int x = 0;  x < arr->w;  ++x)
            data[y][x] = (y * arr->w) + x;
    }
}

void
process3(struct arr2d *arr)
{
    int w = arr->w;
    int h = arr->h;
    float (*data)[w] = (void *) arr->data;

    for (int y = 0;  y < h;  ++y) {
        for (int x = 0;  x < w;  ++x)
            data[y][x] = (y * w) + x;
    }
}

struct arr2d *
new2d(int w,int h)
{
    struct arr2d *arr = malloc(sizeof(*arr));

    arr->w = w;
    arr->h = h;

    arr->data = malloc(sizeof(arr->data[0]) * w * h);

    return arr;
}

<<>BENCHMARKS>:

我很想知道各种职能和指数化方法如何迅速发挥作用。为了尽量减少副作用,仅衡量各种指数方法的影响,我从<代码>float改为int,并增加了一个简单的<代码>idx变量。

这里是基准代码:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

struct arr2d {
    int w;
    int h;
    int *data;
};

void
process1(struct arr2d *arr)
{

    int idx = 0;
    for (int y = 0;  y < arr->h;  ++y) {
        for (int x = 0;  x < arr->w;  ++x, ++idx)
            arr->data[(y * arr->w) + x] = idx;
    }
}

void
process2(struct arr2d *arr)
{
    int (*data)[arr->w] = (void *) arr->data;

    int idx = 0;
    for (int y = 0;  y < arr->h;  ++y) {
        for (int x = 0;  x < arr->w;  ++x, ++idx)
            data[y][x] = idx;
    }
}

void
process3(struct arr2d *arr)
{
    int w = arr->w;
    int h = arr->h;
    int (*data)[w] = (void *) arr->data;

    int idx = 0;
    for (int y = 0;  y < h;  ++y) {
        for (int x = 0;  x < w;  ++x, ++idx)
            data[y][x] = idx;
    }
}

void
process4(struct arr2d *arr)
{
    int w = arr->w;
    int h = arr->h;
    int (*data)[w] = (void *) arr->data;

    int idx = 0;
    for (int y = 0;  y < h;  ++y) {
        int *row = data[y];
        for (int x = 0;  x < w;  ++x, ++idx)
            row[x] = idx;
    }
}

struct arr2d *
new2d(int w,int h)
{
    struct arr2d *arr = malloc(sizeof(*arr));

    arr->w = w;
    arr->h = h;

    arr->data = malloc(sizeof(arr->data[0]) * w * h);

    return arr;
}

void
del2d(struct arr2d *arr)
{

    free(arr->data);
    free(arr);
}

double
tscgetf(void)
{
    struct timespec ts;
    double sec;

    clock_gettime(CLOCK_MONOTONIC,&ts);

    sec = ts.tv_nsec;
    sec /= 1e9;
    sec += ts.tv_sec;

    return sec;
}

#define DOFNC(_fnc) 
    dofnc(arr,_fnc,#_fnc)

double tscold;

void
dofnc(struct arr2d *arr,void (*fnc)(struct arr2d *arr),const char *sym)
{
    double tscbest = 1LL << 60;

    for (int iterbest = 0;  iterbest < 5;  ++iterbest) {
        double tscbeg = tscgetf();

        for (int iterfnc = 0;  iterfnc < 100;  ++iterfnc)
            fnc(arr);

        double tscend = tscgetf();
        tscend -= tscbeg;

        if (tscend < tscbest)
            tscbest = tscend;
    }

    if (tscold == 0)
        tscold = tscbest;

    printf("%.9f %.3fx %s
",tscbest,tscold / tscbest,sym);
}

void
dotest(int w,int h)
{
    struct arr2d *arr = new2d(w,h);

    printf("
");
    printf("dotest: w=%d h=%d
",w,h);
    tscold = 0;
    DOFNC(process1);
    DOFNC(process2);
    DOFNC(process3);
    DOFNC(process4);

    del2d(arr);
}

int
main(void)
{

    dotest(100,50);
    dotest(50,100);
    dotest(100,1000);
    dotest(1000,100);
    dotest(10000,10);

    return 0;
}

我将基准分为三个优化等级:-O0,-O2和-O3。 The benckmarks used process1 as the base. 成果如下:

--------------------------------------------------------------------------------
optimize -O0

dotest: w=100 h=50
0.003704862 1.000x process1
0.001824568 2.031x process2
0.001825894 2.029x process3
0.001562364 2.371x process4

dotest: w=50 h=100
0.003504016 1.000x process1
0.001778585 1.970x process2
0.001773583 1.976x process3
0.001588978 2.205x process4

dotest: w=100 h=1000
0.074285246 1.000x process1
0.036595154 2.030x process2
0.034042392 2.182x process3
0.032144590 2.311x process4

dotest: w=1000 h=100
0.070179764 1.000x process1
0.033196725 2.114x process2
0.033003560 2.126x process3
0.031031687 2.262x process4

dotest: w=10000 h=10
0.069901074 1.000x process1
0.033057440 2.115x process2
0.032876559 2.126x process3
0.028973187 2.413x process4

--------------------------------------------------------------------------------
optimize -O2

dotest: w=100 h=50
0.000613288 1.000x process1
0.000389683 1.574x process2
0.000377139 1.626x process3
0.000377149 1.626x process4

dotest: w=50 h=100
0.000575807 1.000x process1
0.000363035 1.586x process2
0.000346855 1.660x process3
0.000346888 1.660x process4

dotest: w=100 h=1000
0.011875981 1.000x process1
0.007884906 1.506x process2
0.007599107 1.563x process3
0.007596456 1.563x process4

dotest: w=1000 h=100
0.011400999 1.000x process1
0.007180440 1.588x process2
0.007008333 1.627x process3
0.006975097 1.635x process4

dotest: w=10000 h=10
0.011140902 1.000x process1
0.007144627 1.559x process2
0.006928350 1.608x process3
0.006919723 1.610x process4

--------------------------------------------------------------------------------
optimize -O3

dotest: w=100 h=50
0.000584456 1.000x process1
0.000534780 1.093x process2
0.000094556 6.181x process3
0.000096305 6.069x process4

dotest: w=50 h=100
0.000564249 1.000x process1
0.000377867 1.493x process2
0.000107199 5.264x process3
0.000140041 4.029x process4

dotest: w=100 h=1000
0.011978588 1.000x process1
0.010904583 1.098x process2
0.002855820 4.194x process3
0.002877185 4.163x process4

dotest: w=1000 h=100
0.011205538 1.000x process1
0.010515848 1.066x process2
0.002302689 4.866x process3
0.002076997 5.395x process4

dotest: w=10000 h=10
0.011123261 1.000x process1
0.010506258 1.059x process2
0.001983195 5.609x process3
0.001975420 5.631x process4

预计<代码>3将加快,process4将略为加快。

现有:

The amount of speedup of process3 with -O3.
For the other optimization levels, it was 1.5x-2.0x faster than process1.
For -O3, it was 4x-6x faster than process1.
process4 was slightly slower than process3 in some instances.

看看一些分辨率,它发现,使用<代码>-O3的优化器能够使用x86 SIMD指令,其编号为process3和4。

我猜测这只是表明,“最佳地位”可以带来一些令人惊讶的结果。

Answer 2

The difference is the type of vals.

在替换版本中,<代码>vals的类型为float (*)[n],而在您的版本中,<代码>vals的类型为float *。两者不同。

在您的版本中,仅尝试添加1至vals,并将在动态分配阵列中注明的下一个要素的地址,而在替换文本中添加<代码>1> >,则将给出在<代码>n>/code>>>各项内容之后的地址。

float (*vals)[n]

<代码>vals为一系列<代码>n>的编号:float。 i.e. Pointer to float [n]。

When you de-reference vals i.e. *vals, it give float [n] type, which when passed as an argument to a function, converted to pointer to first element of array of float because of the fact that, in C, an array of type converted to pointer of type that points to the initial element of the array object (there are a few exceptions to this rule).
This the reason *vals is passed to process() function in replacement version.

增进了解:

    float * vals = .....

    +---+---+---+---+---+
    |   |   |   |   |   |
    +---+---+---+---+---+
      ^   ^
      |   |
    vals  |
          |
       (vals+1)


    float (*vals)[n] = .....

vals ---+        (vals+1)---+
        |                   |
        v                   v
        +---+---+---+---+---+
        |   |   |   |   |   |
        +---+---+---+---+---+
        \__________________/
                  |
         This whole memory region treated as 
         one element to which vals pointing to.

Answer 3

我的问题是:替换版本如何不同于以下法典:

It isn t, it just has more cumbersome syntax in this specific case. The thing with malloc is that the returned data has no type but guarantees that you can access the chunk of memory as an array of "stuff". The pointer type used to access that chunk is irrelevant, C only cares about the de-referenced type when doing "lvalue access".

无论您是(*vals)[i]=_val(>);,如第一例或 vals[i] =改为_val(>;,与第三例一样,用于升值的条目的类型是float,而且所有这些事项在实践中都是如此。

真的VLA版本的Won t process()就象上面版本那样,仅仅得到阵列的指点,必须依靠它告诉它有多少要素能够通过该点子获得?

自2006年以来 C没有对阵列进行检查的束缚,确实如此。无论数据是否带有VLA,如果C赢得停止,你都可以从<条码>n以外的条码中获取数据。

So what is the point of using (*val)[n] and then passing *val instead?

在这种具体情况下,没有任何点。通常,我们可能有一个参数转至这一功能,但由于阵列随着参数的调整而与第一个要素相适应,如果我们使用<代码>float* arr,float arr[n] (VLA),float arr[ ]或float arr[666]。所有这些都将oil到<代码>float*。

在通过多维阵阵列时,VLA航道的优势就在于此。如果我们这样做的话:

void func (size_t x, size_t y, int arr[x][y])

然后,这种阵列的第一个要素是int [y]。这一要素的点子是int(*)[y]。因此,上述情况相当于:

void func (size_t x, size_t y, int (*arr)[y])

Either int arr[x][y] or int (*arr) [y] 让我们能够使用可读的<代码>arr[i][j] syntax进入该阵列。

然而,如果我们使用一个固定阵列(有时在C89这样做,而C89则不支持VLA)。

void func (int* arr, size_t x, size_t y)

然后,我们必须通过繁琐和容易出错的形式arr[i*y + j]查阅。

Update

友情链接