mirror of
https://github.com/adtools/clib2.git
synced 2025-12-08 14:59:05 +00:00
- Added a directory to hold contributed code which has not been integrated
with the library yet. - The byteswap code was contributed by Peter Bengtsson. Thank you very much! git-svn-id: file:///Users/olsen/Code/migration-svn-zu-git/logical-line-staging/clib2/trunk@15163 87f5fb63-7c3d-0410-a384-fd976d0f7a62
This commit is contained in:
112
library/contrib/byteswap/byteswap_swab32.c
Normal file
112
library/contrib/byteswap/byteswap_swab32.c
Normal file
@ -0,0 +1,112 @@
|
||||
|
||||
#if defined(__GNUC__) && defined(__PPC__)
|
||||
|
||||
/* r3=from, r4=to, r5=len, r6=index, r7=load/store temp */
|
||||
|
||||
asm("\
|
||||
.text\n\
|
||||
.align 2\n\
|
||||
.globl swab32\n\
|
||||
.type swab32,@function\n\
|
||||
swab32:\n\
|
||||
srawi. %r5,%r5,2\n\
|
||||
li %r6,0\n\
|
||||
bc 4,gt,.exit\n\
|
||||
mtctr %r5\n\
|
||||
.loop:\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
bc 0,lt,.loop\n\
|
||||
.exit:\n\
|
||||
or %r3,%r4,%r4\n\
|
||||
blr\n\
|
||||
");
|
||||
|
||||
/* r3=from, r4=to, r5=len/temp, r6=index, r7=load/store temp, r8=cache hint
|
||||
*
|
||||
* The unrolled, cache-hinting version appears to be about 4.5% faster, but
|
||||
* in this case I opted for the smaller implementation. swab64() appears to
|
||||
* gain more from cache-hinting - probably because of it using more registers
|
||||
* for intermediate storage.
|
||||
asm("\
|
||||
.text\n\
|
||||
.align 2\n\
|
||||
.globl swab32\n\
|
||||
.type swab32,@function\n\
|
||||
swab32:\n\
|
||||
dcbt 0,%r3\n\
|
||||
andi. %r8,%r5,31 # The number of bytes handled in '.pre'. Used for prefetch hint.\n\
|
||||
srawi %r5,%r5,2 # Convert bytes-># of 32-bit words\n\
|
||||
andi. %r7,%r5,7\n\
|
||||
li %r6,0\n\
|
||||
bc 4,gt,.preploop\n\
|
||||
mtctr %r7\n\
|
||||
.pre: # One 32-bit word at a time until we have (nLeft%8)==0 \n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
bc 0,lt,.pre\n\
|
||||
.preploop:\n\
|
||||
srawi. %r5,%r5,3 # Divide by 8 again to get number of loops.\n\
|
||||
addi %r8,%r8,32 # Start address for next loop (from r3).\n\
|
||||
bc 4,gt,.exit\n\
|
||||
mtctr %r5\n\
|
||||
.loop: # Loop unrolled 8 times = 32 bytes = 1 cache-line (except on the 970).\n\
|
||||
dcbt %r8,%r3 # Cache hint (prefetch) for the next loop\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
addi %r8,%r8,32 # Update cache-hint offset\n\
|
||||
bc 0,lt,.loop\n\
|
||||
.exit:\n\
|
||||
or %r3,%r4,%r4\n\
|
||||
blr\n\
|
||||
");
|
||||
*/
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void *swab32(void *from,void *to,ssize_t len)
|
||||
{
|
||||
int i;
|
||||
uint32_t *u32in=from,*u32out=to,tmp;
|
||||
|
||||
for(i=0;i<(len>>2);i++) {
|
||||
tmp=u32in[i];
|
||||
u32out[i]=((tmp&0xff)<<24)|
|
||||
((tmp&0xff00)<<8)|
|
||||
((tmp&0xff0000)>>8)|
|
||||
((tmp&0xff000000)>>24);
|
||||
}
|
||||
|
||||
return(to);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user