Filling three arrays at once.

Hey, I wanted to know if there is a more efficient way to fill three arrays of different size at once than what I have here.


#include <iostream>
#include <algorithm>

int main()
{
int *array1= new int[100];
int *array2= new int[10000];
int *array3= new int[1000000];
const int size= 1000000;

for (int i=0, j=size-1; i<size-1; i++, j--)
{
if (i<100)
{
array1[i]=i;
}
if(i<10000) { array2[j]=i; }
array3[i]=i;
}

std::random_shuffle(&array1[0], &array1[99]);
std::random_shuffle(&array2[0], &array2[9999]);
std::random_shuffle(&array3[0], &array3[size-1]);

delete [] array1;
delete [] array2;
delete [] array3;

}


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
void fill_array(int *array, size_t n){
    for (size_t i = n; i--;)
        array[i] = i;
    std::random_shuffle(array, array + n);
}

int main(){
    const size_t array1_size = 100;
    const size_t array2_size = 10000;
    const size_t array3_size = 1000000;
    
    int *array1 = new int[array1_size];
    int *array2 = new int[array2_size];
    int *array3 = new int[array3_size];
    
    fill_array(array1, array1_size);
    fill_array(array2, array2_size);
    fill_array(array3, array3_size);

    delete [] array1;
    delete [] array2;
    delete [] array3;

}
There's no way to do multiple things at once, and threads don't work well for such small workloads.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#include <numeric>
#include <memory>

int main()
{
     const std::size_t N1 = 100, N2 = 10'000, N3 = 1'000'000 ;
     static int a1[N1] ;
     static int a2[N2] ;
     static int a3[N3] ;
     
     // fill the largest array with 0, 1, 2 ...
     std::iota( std::begin(a3), std::end(a3), 0 ) ; 
     
     // copy (block move) right size chunks to the smaller arrays
     std::uninitialized_copy( std::begin(a3), std::begin(a3) + N2, std::begin(a2) ) ;
     std::uninitialized_copy( std::begin(a3), std::begin(a3) + N1, std::begin(a1) ) ;
} 

main:
        vmovdqa ymm0, YMMWORD PTR .LC0[rip]
        xor     eax, eax
        vmovdqa ymm1, YMMWORD PTR .LC1[rip]
.L2:
        mov     rdx, rax
        add     rax, 1
        sal     rdx, 5
        vmovdqa YMMWORD PTR main::a3[rdx], ymm0
        vpaddd  ymm0, ymm0, ymm1
        cmp     rax, 124999
        jbe     .L2
        vzeroupper
        xor     eax, eax
        ret
       ...

https://godbolt.org/g/U65h5C
Last edited on
Is it just me, or did the compiler optimize away everything after line 12?
Is the type conversion okay though? size_t to int?
> did the compiler optimize away everything after line 12?

Yes, it did; it generated no block moves at all (calls to the intrinsic memcpy).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#include <numeric>
#include <memory>

const std::size_t N1 = 100, N2 = 10'000, N3 = 1'000'000 ;
int a1[N1] ;
int a2[N2] ;
int a3[N3] ;

int main()
{
     // fill the largest array with 0, 1, 2 ...
     std::iota( std::begin(a3), std::end(a3), 0 ) ; 
     
     // copy (block move) right size chunks to the smaller arrays
     std::uninitialized_copy( std::begin(a3), std::begin(a3) + N2, std::begin(a2) ) ;
     std::uninitialized_copy( std::begin(a3), std::begin(a3) + N1, std::begin(a1) ) ;
} 

main:
        lea     r10, [rsp+8]
        and     rsp, -32
        xor     eax, eax
        push    QWORD PTR [r10-8]
        push    rbp
        mov     rbp, rsp
        push    r10
        sub     rsp, 8
        vmovdqa ymm0, YMMWORD PTR .LC0[rip]
        vmovdqa ymm1, YMMWORD PTR .LC1[rip]
.L2:
        mov     rdx, rax
        add     rax, 1
        sal     rdx, 5
        vmovdqa YMMWORD PTR a3[rdx], ymm0
        vpaddd  ymm0, ymm0, ymm1
        cmp     rax, 124999
        jbe     .L2
        mov     edx, 40000
        mov     esi, OFFSET FLAT:a3
        mov     edi, OFFSET FLAT:a2
        vzeroupper
        call    memcpy
        mov     edx, 400
        mov     esi, OFFSET FLAT:a3
        mov     edi, OFFSET FLAT:a1
        call    memcpy
        add     rsp, 8
        xor     eax, eax
        pop     r10
        pop     rbp
        lea     rsp, [r10-8]
        ret
        ...

https://godbolt.org/g/vA2PKq


> Is the type conversion okay though? size_t to int?

For these range of values, it would be fine. Ideally, always use std::size_t for array size.
Last edited on
do you really need 3 arrays? The only tweak I have right this second is that if you don't actually NEED to modify the 3 arrays, you can just fill in the big one and take pointers inside it as the smaller ones. That gives the same result as copying a piece of the big one into the small ones, without the copy. But if you need to change or do things to the arrays, it wouldn't work, as changes affect the big array.


With these sizes const std::size_t N1 = 100, N2 = 10'000, N3 = 1'000'000 ; , it really does not matter how the code is written: the time taken for filling (and shuffling) the large array will thoroughly dominate everything else.

If the sizes of these arrays are of comparable orders of magnitude, there could be a case for trying to make the initialisation faster.
Last edited on
Topic archived. No new replies allowed.