Calling a subroutine
-
This horror using GCC for AVR.
static void apicall( void ) __attribute__ ((noinline));
static void apicall( void )
{
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>1)&0xFF)); // lo
asm volatile("push r22");
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>9)&0xFF)); // hi
asm volatile("push r22");
#if( FLASHEND > 0x1FFFF )
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>17)&0xFF)); // xhi
asm volatile("push r22");
#endif
asm volatile("ldi r22, %0" :: "M" (API_PROG_PAGE)); // function
return; // jump to API
}/************** copy one page from SRAM to Flash ************************/
unsigned char copy_flash( void *src, void *dst, unsigned char dst_hi )
{
unsigned char i;if( (unsigned int)dst & (SPM_PAGESIZE-1))
return API_ERR_PAGE; // not on page limitasm volatile("movw r26, %0" :: "r" (src));
asm volatile("movw r30, %0" :: "r" (dst));
asm volatile("mov r21, %0" :: "r" (dst_hi));
apicall();
asm volatile("clr r1"); // clear zero reg
asm volatile("mov %0, r22" : "=r" (i));
return i; // success
}Why push a address on the stack and using the compiler emitted ret instruction instead of using a call instruction? Assuming CPU registers remain unchanged between asm statements: bad Changing CPU registers in assembler without informing compiler: worse Why not write the subroutine to use the C calling convention?
-
This horror using GCC for AVR.
static void apicall( void ) __attribute__ ((noinline));
static void apicall( void )
{
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>1)&0xFF)); // lo
asm volatile("push r22");
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>9)&0xFF)); // hi
asm volatile("push r22");
#if( FLASHEND > 0x1FFFF )
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>17)&0xFF)); // xhi
asm volatile("push r22");
#endif
asm volatile("ldi r22, %0" :: "M" (API_PROG_PAGE)); // function
return; // jump to API
}/************** copy one page from SRAM to Flash ************************/
unsigned char copy_flash( void *src, void *dst, unsigned char dst_hi )
{
unsigned char i;if( (unsigned int)dst & (SPM_PAGESIZE-1))
return API_ERR_PAGE; // not on page limitasm volatile("movw r26, %0" :: "r" (src));
asm volatile("movw r30, %0" :: "r" (dst));
asm volatile("mov r21, %0" :: "r" (dst_hi));
apicall();
asm volatile("clr r1"); // clear zero reg
asm volatile("mov %0, r22" : "=r" (i));
return i; // success
}Why push a address on the stack and using the compiler emitted ret instruction instead of using a call instruction? Assuming CPU registers remain unchanged between asm statements: bad Changing CPU registers in assembler without informing compiler: worse Why not write the subroutine to use the C calling convention?
-
This horror using GCC for AVR.
static void apicall( void ) __attribute__ ((noinline));
static void apicall( void )
{
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>1)&0xFF)); // lo
asm volatile("push r22");
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>9)&0xFF)); // hi
asm volatile("push r22");
#if( FLASHEND > 0x1FFFF )
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>17)&0xFF)); // xhi
asm volatile("push r22");
#endif
asm volatile("ldi r22, %0" :: "M" (API_PROG_PAGE)); // function
return; // jump to API
}/************** copy one page from SRAM to Flash ************************/
unsigned char copy_flash( void *src, void *dst, unsigned char dst_hi )
{
unsigned char i;if( (unsigned int)dst & (SPM_PAGESIZE-1))
return API_ERR_PAGE; // not on page limitasm volatile("movw r26, %0" :: "r" (src));
asm volatile("movw r30, %0" :: "r" (dst));
asm volatile("mov r21, %0" :: "r" (dst_hi));
apicall();
asm volatile("clr r1"); // clear zero reg
asm volatile("mov %0, r22" : "=r" (i));
return i; // success
}Why push a address on the stack and using the compiler emitted ret instruction instead of using a call instruction? Assuming CPU registers remain unchanged between asm statements: bad Changing CPU registers in assembler without informing compiler: worse Why not write the subroutine to use the C calling convention?
Looks like an attempt to do a tail-call optimisation, so that the subroutine call returns directly to our caller (i.e. copy_flash) rather than to apicall() itself. It's probably an attempt to save a little code space. This can backfire though: modern processors do a lot of branch prediction, and unbalancing the call/return stack will screw up the branch predictor and make your code run slower. I know Raymond Chen wrote about this but can't find the link. I would go for a direct jump to the API entry point. Perhaps the processor doesn't have a readily-accessible jump instruction? I'd be astonished if it didn't, though. As for changing CPU registers, the Application Binary Interface (ABI) for the system will state which registers are volatile - can be changed by a function without saving it - and which are non-volatile - must be saved by the function before being used. A function must save all the volatile registers it's used before making a call, because the called function might trash them. I agree with you on the weird manual call to apicall() though.
DoEvents: Generating unexpected recursion since 1991
-
This horror using GCC for AVR.
static void apicall( void ) __attribute__ ((noinline));
static void apicall( void )
{
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>1)&0xFF)); // lo
asm volatile("push r22");
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>9)&0xFF)); // hi
asm volatile("push r22");
#if( FLASHEND > 0x1FFFF )
asm volatile("ldi r22, %0" :: "M" ((FLASHEND>>17)&0xFF)); // xhi
asm volatile("push r22");
#endif
asm volatile("ldi r22, %0" :: "M" (API_PROG_PAGE)); // function
return; // jump to API
}/************** copy one page from SRAM to Flash ************************/
unsigned char copy_flash( void *src, void *dst, unsigned char dst_hi )
{
unsigned char i;if( (unsigned int)dst & (SPM_PAGESIZE-1))
return API_ERR_PAGE; // not on page limitasm volatile("movw r26, %0" :: "r" (src));
asm volatile("movw r30, %0" :: "r" (dst));
asm volatile("mov r21, %0" :: "r" (dst_hi));
apicall();
asm volatile("clr r1"); // clear zero reg
asm volatile("mov %0, r22" : "=r" (i));
return i; // success
}Why push a address on the stack and using the compiler emitted ret instruction instead of using a call instruction? Assuming CPU registers remain unchanged between asm statements: bad Changing CPU registers in assembler without informing compiler: worse Why not write the subroutine to use the C calling convention?
Timothy Baldwin wrote:
Why not write the subroutine to use the C calling convention?
Some microcontrollers have unchangeable routines in ROM to perform various functions like writing to flash memory. Some of them require that such routines be used when writing to flash because (1) none of the RAM in the system supports code execution, and (2) while a write to flash is taking place, none of the flash memory can be used for any purpose including program execution. The register and calling conventions for such routines are what they are, and there is no way a programmer or compiler author can change them. Sometimes microcontrollers require some really nasty coding tricks to make things work in practical fashion. When I use such tricks I make sure to thoroughly document what I'm doing, and I avoid using such tricks purely for the sake of "looking impressive". On the other hand, if an interrupt routine is going to be executing 10,000 times per second on a micro which runs 1,000,000 instructions per second, having clear readable code which takes 30 cycles longer than necessary is not as good as having code which is tricky and hard to read, but runs 50 cycles faster.
-
Looks like an attempt to do a tail-call optimisation, so that the subroutine call returns directly to our caller (i.e. copy_flash) rather than to apicall() itself. It's probably an attempt to save a little code space. This can backfire though: modern processors do a lot of branch prediction, and unbalancing the call/return stack will screw up the branch predictor and make your code run slower. I know Raymond Chen wrote about this but can't find the link. I would go for a direct jump to the API entry point. Perhaps the processor doesn't have a readily-accessible jump instruction? I'd be astonished if it didn't, though. As for changing CPU registers, the Application Binary Interface (ABI) for the system will state which registers are volatile - can be changed by a function without saving it - and which are non-volatile - must be saved by the function before being used. A function must save all the volatile registers it's used before making a call, because the called function might trash them. I agree with you on the weird manual call to apicall() though.
DoEvents: Generating unexpected recursion since 1991
This processor does have a jump instruction, but putting everything in copy_flash would be better. And the assumption the the return address is at the top of the stack breaks when optimisation is not enabled. What registers can be changed by a function is irrelevant here. The question is what inline assembler is allowed to do, whilst Visual C++ will parse inline assembler to determine register usage, GCC will not. If one of the registers has been allocated for another purpose by the compiler, it will break. Neither Visual C++ or GCC guarantees that registers will remain unchanged between consecutive asm statements. If profiling is enabled this code will break.