C: Divide by 9 optimizing?

Divide by 8 is easy: just shift right 3 bits. How can I do this with divide by 9? I'm using some calculations for converting horizontal pixels to columns in my VGA emulation, but the divide-and-substract method of obtaining the character and pixel within the character is a bit slow still:

1
2
3
4
5
6
7
	charsize = getcharacterwidth(VGA); //Current character width!
	character = charinner = Sequencer->tempx; //Current position to render into both values!
	character /= charsize; //Current character!
	charsize *= character; //Calculate total pixels for the character to start!
	charinner -= charsize; //Calculate inner pixel!
	attributeinfo->charx = character; //Load the character!
	attributeinfo->charinner_x = charinner; //Load the inner position! 


getcharacterwidth always returns 8 or 9.

Anyone knows how to speed this up?

Using 8 pixels per character it's easy (
1
2
charx = (tempx>>3);
charinner_x = (tempx&7);
), but how do I do this with 9 pixels per character?
Last edited on
Consider leaving low level optimisations to the compiler.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
int divide_by_8( int a )
{
    return a/8 ;
}

unsigned int divide_by_8u( unsigned int a )
{
    return a/8 ;
}

int divide_by_9( int a )
{
    return a/9 ;
}

unsigned int divide_by_9u( unsigned int a )
{
    return a/9 ;
}

echo -e '------- clang++ -------\n' && clang++ -std=c++11 -O3 -march=native -Wall -Wextra -pedantic-errors -fomit-frame-pointer -c -S main.cpp && cat main.s
echo -e '\n-----  g++ ------\n' && g++ -std=c++11 -O3 -march=native -Wall -Wextra -pedantic-errors -fomit-frame-pointer -c -S main.cpp && cat main.s
------- clang++ -------

	.text
	.file	"main.cpp"
	.globl	_Z11divide_by_8i
	.align	16, 0x90
	.type	_Z11divide_by_8i,@function
_Z11divide_by_8i:                       # @_Z11divide_by_8i
	.cfi_startproc
# BB#0:
	movl	%edi, %eax
	sarl	$31, %eax
	shrl	$29, %eax
	addl	%edi, %eax
	sarl	$3, %eax
	retq
.Ltmp0:
	.size	_Z11divide_by_8i, .Ltmp0-_Z11divide_by_8i
	.cfi_endproc

	.globl	_Z12divide_by_8uj
	.align	16, 0x90
	.type	_Z12divide_by_8uj,@function
_Z12divide_by_8uj:                      # @_Z12divide_by_8uj
	.cfi_startproc
# BB#0:
	shrl	$3, %edi
	movl	%edi, %eax
	retq
.Ltmp1:
	.size	_Z12divide_by_8uj, .Ltmp1-_Z12divide_by_8uj
	.cfi_endproc

	.globl	_Z11divide_by_9i
	.align	16, 0x90
	.type	_Z11divide_by_9i,@function
_Z11divide_by_9i:                       # @_Z11divide_by_9i
	.cfi_startproc
# BB#0:
	movslq	%edi, %rax
	imulq	 $954437177, %rax       # imm = 0x38E38E39
	movq	%rax, %rcx
	shrq	$63, %rcx
	sarq	$33, %rax
	addl	%ecx, %eax
                                        # kill: EAX<def> EAX<kill> RAX<kill>
	retq
.Ltmp2:
	.size	_Z11divide_by_9i, .Ltmp2-_Z11divide_by_9i
	.cfi_endproc

	.globl	_Z12divide_by_9uj
	.align	16, 0x90
	.type	_Z12divide_by_9uj,@function
_Z12divide_by_9uj:                      # @_Z12divide_by_9uj
	.cfi_startproc
# BB#0:
	movl	%edi, %eax
	imulq	 $954437177, %rax       # imm = 0x38E38E39
	shrq	$33, %rax
                                        # kill: EAX<def> EAX<kill> RAX<kill>
	retq
.Ltmp3:
	.size	_Z12divide_by_9uj, .Ltmp3-_Z12divide_by_9uj
	.cfi_endproc


	.ident	"clang version 3.5.0 (tags/RELEASE_350/final 217394)"
	.section	".note.GNU-stack","",@progbits

-----  g++ ------

	.file	"main.cpp"
	.section	.text.unlikely,"ax",@progbits
.LCOLDB0:
	.text
.LHOTB0:
	.p2align 5,,31
	.globl	_Z11divide_by_8i
	.type	_Z11divide_by_8i, @function
_Z11divide_by_8i:
.LFB0:
	.cfi_startproc
	testl	%edi, %edi
	leal	7(%rdi), %eax
	cmovns	%edi, %eax
	sarl	$3, %eax
	ret
	.cfi_endproc
.LFE0:
	.size	_Z11divide_by_8i, .-_Z11divide_by_8i
	.section	.text.unlikely
.LCOLDE0:
	.text
.LHOTE0:
	.section	.text.unlikely
.LCOLDB1:
	.text
.LHOTB1:
	.p2align 5,,31
	.globl	_Z12divide_by_8uj
	.type	_Z12divide_by_8uj, @function
_Z12divide_by_8uj:
.LFB1:
	.cfi_startproc
	movl	%edi, %eax
	shrl	$3, %eax
	ret
	.cfi_endproc
.LFE1:
	.size	_Z12divide_by_8uj, .-_Z12divide_by_8uj
	.section	.text.unlikely
.LCOLDE1:
	.text
.LHOTE1:
	.section	.text.unlikely
.LCOLDB2:
	.text
.LHOTB2:
	.p2align 5,,31
	.globl	_Z11divide_by_9i
	.type	_Z11divide_by_9i, @function
_Z11divide_by_9i:
.LFB2:
	.cfi_startproc
	movl	%edi, %eax
	movl	$954437177, %edx
	sarl	$31, %edi
	imull	%edx
	sarl	%edx
	movl	%edx, %eax
	subl	%edi, %eax
	ret
	.cfi_endproc
.LFE2:
	.size	_Z11divide_by_9i, .-_Z11divide_by_9i
	.section	.text.unlikely
.LCOLDE2:
	.text
.LHOTE2:
	.section	.text.unlikely
.LCOLDB3:
	.text
.LHOTB3:
	.p2align 5,,31
	.globl	_Z12divide_by_9uj
	.type	_Z12divide_by_9uj, @function
_Z12divide_by_9uj:
.LFB3:
	.cfi_startproc
	movl	%edi, %eax
	movl	$954437177, %edx
	mull	%edx
	movl	%edx, %eax
	shrl	%eax
	ret
	.cfi_endproc
.LFE3:
	.size	_Z12divide_by_9uj, .-_Z12divide_by_9uj
	.section	.text.unlikely
.LCOLDE3:
	.text
.LHOTE3:
	.ident	"GCC: (GNU) 4.9.2"
	.section	.note.GNU-stack,"",@progbits

http://coliru.stacked-crooked.com/a/ac8a03fe0c23a88e
Topic archived. No new replies allowed.