Speeding up some functions.

I have a program im looking at and i am supposed to speed it up, it has a set of functions that rotate and smooth a series of blue and green pictures, if anyone could please help me or give some tips please let me know what i can do. Thanks.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/* 
 * naive_rotate - The naive baseline version of rotate 
 */
char naive_rotate_descr[] = "naive_rotate: Naive baseline implementation";
void naive_rotate(const int dim, pixel *src, pixel *dst)
{
    int i, j;

    for (i = 0; i < dim; ++i)
        for (j = 0; j < dim; ++j)
            dst[RIDX(dim-1-j, i, dim)] = src[RIDX(i, j, dim)];
}
char rotate_descr[] = "rotate: Current working version";
void rotate(const int dim, pixel *src, pixel *dst)
{
    naive_rotate(dim, src, dst);
}


void register_rotate_functions()
{
    add_rotate_function(&naive_rotate, naive_rotate_descr);
    add_rotate_function(&rotate, rotate_descr);
    /* ... Register additional test functions here */
}


/***************
 * SMOOTH KERNEL
 **************/

/* A struct used to compute averaged pixel value */
typedef struct {
    int red;
    int green;
    int blue;
    int num;
} pixel_sum;

/* Compute min and max of two integers, respectively */
static int min(int a, int b) { return (a < b ? a : b); }
static int max(int a, int b) { return (a > b ? a : b); }

/* 
 * initialize_pixel_sum - Initializes all fields of sum to 0 
 */
static void initialize_pixel_sum(pixel_sum *sum)
{
    sum->red = sum->green = sum->blue = 0;
    sum->num = 0;
    return;
}

/* 
 * accumulate_sum - Accumulates field values of p in corresponding 
 * fields of sum 
 */
static void accumulate_sum(pixel_sum *sum, pixel p)
{
    sum->red += (int) p.red;
    sum->green += (int) p.green;
    sum->blue += (int) p.blue;
    sum->num++;
    return;
}

/* 
 * assign_sum_to_pixel - Computes averaged pixel value in current_pixel 
 */
static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum)
{
    current_pixel->red = (unsigned short) (sum.red/sum.num);
    current_pixel->green = (unsigned short) (sum.green/sum.num);
    current_pixel->blue = (unsigned short) (sum.blue/sum.num);
    return;
}

/* 
 * avg - Returns averaged pixel value at (i,j) 
 */
static pixel avg(int dim, const int i, const int j, pixel *src)
{
    int ii, jj;
    pixel_sum sum;
    pixel current_pixel;

    initialize_pixel_sum(&sum);
    for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ++ii)
        for(jj = max(j-1, 0); jj <= min(j+1, dim-1); ++jj)
            accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);

    assign_sum_to_pixel(&current_pixel, sum);
    return current_pixel;
}



/*
 * naive_smooth - The naive baseline version of smooth 
 */
char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation";
void naive_smooth(int dim, pixel *src, pixel *dst)
{
    int i, j;

    for (i = 0; i < dim; ++i)
        for (j = 0; j < dim; ++j)
            dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}


char smooth_descr[] = "smooth: Current working version";
void smooth(int dim, pixel *src, pixel *dst)
{
    naive_smooth(dim, src, dst);
}

void register_smooth_functions() {
    add_smooth_function(&smooth, smooth_descr);
    add_smooth_function(&naive_smooth, naive_smooth_descr);
    /* ... Register additional test functions here */
}


Run a profiler and see where the delays actually are first.
here are the time tables for them.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
Rotate: Version = naive_rotate: Naive baseline implementation:
Dim		64	128	256	512	1024	Mean
Your CPEs	20.7	20.6	21.5	31.4	210.3
Baseline CPEs	14.7	40.1	46.4	65.9	94.5
Speedup		0.7	1.9	2.2	2.1	0.4	1.2

Rotate: Version = rotate: Current working version:
Dim		64	128	256	512	1024	Mean
Your CPEs	20.7	20.7	28.5	31.5	208.4
Baseline CPEs	14.7	40.1	46.4	65.9	94.5
Speedup		0.7	1.9	1.6	2.1	0.5	1.2

Smooth: Version = smooth: Current working version:
Dim		32	64	128	256	512	Mean
Your CPEs	231.7	231.7	232.0	231.6	233.9
Baseline CPEs	695.0	698.0	702.0	717.0	722.0
Speedup		3.0	3.0	3.0	3.1	3.1	3.0

Smooth: Version = naive_smooth: Naive baseline implementation:
Dim		32	64	128	256	512	Mean
Your CPEs	231.6	230.7	231.9	231.5	233.6
Baseline CPEs	695.0	698.0	702.0	717.0	722.0
Speedup		3.0	3.0	3.0	3.1	3.1	3.0
You need better resolution. Which lines of code are eating the most time?
probably smooth naive and rotate naive are the slowest things, i figured i need to unroll some loops and do some blocking but im not sure how to do it, i have only ever done it with really simple functions. pixel avg needs work too im trying to unroll that now but im not doing it right. i tried combining the two rotate and the two smooth functions together it still works fine but no viable speed up.
Last edited on
This is my new rotate

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
int i, j;
    int ii, jj;

    for(jj=0; jj <  dim; jj+=32)
    for(ii=0; ii <  dim; ii+=32)
        for(j=jj; j <  jj+32; j+=4)
        for(i=ii; i <  ii+32; i+=4) {
            dst[RIDX(dim-1-j,i,dim)] = src[RIDX(i,j,dim)];
            dst[RIDX(dim-1-j,i+1,dim)] = src[RIDX(i+1,j,dim)];
            dst[RIDX(dim-1-j,i+2,dim)] = src[RIDX(i+2,j,dim)];
            dst[RIDX(dim-1-j,i+3,dim)] = src[RIDX(i+3,j,dim)];
            dst[RIDX(dim-1-j-1,i,dim)] = src[RIDX(i,j+1,dim)];
            dst[RIDX(dim-1-j-1,i+1,dim)] = src[RIDX(i+1,j+1,dim)];
            dst[RIDX(dim-1-j-1,i+2,dim)] = src[RIDX(i+2,j+1,dim)];
            dst[RIDX(dim-1-j-1,i+3,dim)] = src[RIDX(i+3,j+1,dim)];
            dst[RIDX(dim-1-j-2,i,dim)] = src[RIDX(i,j+2,dim)];
            dst[RIDX(dim-1-j-2,i+1,dim)] = src[RIDX(i+1,j+2,dim)];
            dst[RIDX(dim-1-j-2,i+2,dim)] = src[RIDX(i+2,j+2,dim)];
            dst[RIDX(dim-1-j-2,i+3,dim)] = src[RIDX(i+3,j+2,dim)];
            dst[RIDX(dim-1-j-3,i,dim)] = src[RIDX(i,j+3,dim)];
            dst[RIDX(dim-1-j-3,i+1,dim)] = src[RIDX(i+1,j+3,dim)];
            dst[RIDX(dim-1-j-3,i+2,dim)] = src[RIDX(i+2,j+3,dim)];
            dst[RIDX(dim-1-j-3,i+3,dim)] = src[RIDX(i+3,j+3,dim)];
        }
Topic archived. No new replies allowed.