mirror of
https://github.com/adtools/clib2.git
synced 2025-12-08 14:59:05 +00:00
- Added a directory to hold contributed code which has not been integrated
with the library yet. - The byteswap code was contributed by Peter Bengtsson. Thank you very much! git-svn-id: file:///Users/olsen/Code/migration-svn-zu-git/logical-line-staging/clib2/trunk@15163 87f5fb63-7c3d-0410-a384-fd976d0f7a62
This commit is contained in:
3
library/contrib/README
Normal file
3
library/contrib/README
Normal file
@ -0,0 +1,3 @@
|
||||
This directory contains contributions which have not yet been integrated
|
||||
with the clib2 library build but which should be in the CVS repository
|
||||
both for safekeeping and for you to look at and adapt.
|
||||
183
library/contrib/byteswap/byteswap.h
Normal file
183
library/contrib/byteswap/byteswap.h
Normal file
@ -0,0 +1,183 @@
|
||||
|
||||
#ifndef __BYTESWAP_H
|
||||
#define __BYTESWAP_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define __CONST_FUNC __attribute__((const))
|
||||
#else
|
||||
#define __CONST_FUNC /* Nothing */
|
||||
#endif
|
||||
|
||||
/* Single value byteswap functions. */
|
||||
|
||||
extern __CONST_FUNC uint16_t bswap16(uint16_t);
|
||||
extern __CONST_FUNC uint32_t bswap24(uint32_t);
|
||||
extern __CONST_FUNC uint32_t bswap32(uint32_t);
|
||||
|
||||
#ifdef INT64_MIN
|
||||
extern __CONST_FUNC uint64_t bswap64(uint64_t);
|
||||
#endif
|
||||
|
||||
/* Block byteswap functions. The swab() function usually resides in unistd.h, so perhaps it should be moved there? */
|
||||
/* NOTE: Contrary to the standard swab(), this version returns the "to" pointer and the pointers are not restrict
|
||||
* qualified - so swapping buffer-contents in-place is supported.
|
||||
* Also, swab24(), swab32() and swab64() are non-standard functions.
|
||||
*/
|
||||
|
||||
extern void *swab(void *from,void *to,ssize_t nbytes);
|
||||
extern void *swab24(void *from,void *to,ssize_t nbytes); /* Same as swab(), but operates on 24-bit words instead. */
|
||||
extern void *swab32(void *from,void *to,ssize_t nbytes); /* Same as swab(), but operates on 32-bit words instead. */
|
||||
extern void *swab64(void *from,void *to,ssize_t nbytes); /* Same as swab(), but operates on 64-bit words instead. */
|
||||
|
||||
#define swab16(x) swab(x)
|
||||
|
||||
/*
|
||||
* Optimized inline-versions for the single-value functions follow.
|
||||
* Only GCC+PPC and GCC+m68k support for now.
|
||||
*/
|
||||
|
||||
#if defined(__GNUC__)
|
||||
|
||||
/* Select implementation. */
|
||||
|
||||
#define bswap16(x) (__builtin_constant_p(x))?__const_swap16(x):__swap16(x)
|
||||
#define bswap24(x) (__builtin_constant_p(x))?__const_swap24(x):__swap24(x)
|
||||
#define bswap32(x) (__builtin_constant_p(x))?__const_swap32(x):__swap32(x)
|
||||
#define bswap64(x) (__builtin_constant_p(x))?__const_swap64(x):__swap64(x)
|
||||
|
||||
/* Assembler implementations */
|
||||
|
||||
#if defined(__PPC__)
|
||||
|
||||
static __inline__ __CONST_FUNC uint16_t __swap16(uint16_t u16) {
|
||||
uint_fast16_t result;
|
||||
__asm__("\
|
||||
rlwinm %[result],%[u16],8,16,24\n\
|
||||
rlwimi %[result],%[u16],24,24,31\n\
|
||||
":[result]"=&r"(result):[u16]"r"(u16));
|
||||
return(result);
|
||||
}
|
||||
|
||||
static __inline__ __CONST_FUNC uint32_t __swap24(uint32_t u32) {
|
||||
uint_fast32_t result;
|
||||
__asm__("\
|
||||
rlwinm %[result],%[u32],16,8,31\n\
|
||||
rlwimi %[result],%[u32],0,16,24\n\
|
||||
":[result]"=&r"(result):[u32]"r"(u32));
|
||||
return(result);
|
||||
}
|
||||
|
||||
static __inline__ __CONST_FUNC uint32_t __swap32(uint32_t u32) {
|
||||
uint_fast32_t result;
|
||||
__asm__("\
|
||||
rlwinm %[result],%[u32],8,8,31\n\
|
||||
rlwimi %[result],%[u32],24,0,7\n\
|
||||
rlwimi %[result],%[u32],24,16,23\n\
|
||||
":[result]"=&r"(result):[u32]"r"(u32));
|
||||
return(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: __swap64() might perhaps be optimized a bit more by scheduling the
|
||||
* instructions to alternate register-use, but this instead means there
|
||||
* are two less registers free since "u64" and "result" may no longer overlap.
|
||||
* Decisions, decisions....
|
||||
*/
|
||||
|
||||
static __inline__ __CONST_FUNC uint64_t __swap64(uint64_t u64) {
|
||||
uint_fast64_t result;
|
||||
uint_fast32_t tmp;
|
||||
__asm__("\
|
||||
rlwinm %[tmp],%[u64],8,8,31\n\
|
||||
rlwimi %[tmp],%[u64],24,0,7\n\
|
||||
rlwimi %[tmp],%[u64],24,16,23\n\
|
||||
rlwinm %[result],%L[u64],8,8,31\n\
|
||||
rlwimi %[result],%L[u64],24,0,7\n\
|
||||
rlwimi %[result],%L[u64],24,16,23\n\
|
||||
or %L[result],%[tmp],%[tmp]\n\
|
||||
":[result]"=r"(result),[tmp]"=&r"(tmp):[u64]"r"(u64));
|
||||
return(result);
|
||||
}
|
||||
|
||||
#elif defined(__mc68020__)
|
||||
|
||||
static __inline__ __CONST_FUNC uint16_t __swap16(uint16_t u16) {
|
||||
__asm__("\
|
||||
rol.w #8,%[u16]\n\
|
||||
":[u16]"+d"(u16)::"cc");
|
||||
return(u16);
|
||||
}
|
||||
|
||||
static __inline__ __CONST_FUNC uint32_t __swap24(uint32_t u32) {
|
||||
__asm__("\
|
||||
rol.w #8,%[u32]\n\
|
||||
swap %[u32]\n\
|
||||
rol.w #8,%[u32]\n\
|
||||
ror.l #8,%[u32]\n\
|
||||
":[u32]"+d"(u32)::"cc");
|
||||
return(u32);
|
||||
}
|
||||
|
||||
static __inline__ __CONST_FUNC uint32_t __swap32(uint32_t u32) {
|
||||
__asm__("\
|
||||
rol.w #8,%[u32]\n\
|
||||
swap %[u32]\n\
|
||||
rol.w #8,%[u32]\n\
|
||||
":[u32]"+d"(u32)::"cc");
|
||||
return(u32);
|
||||
}
|
||||
|
||||
static __inline__ __CONST_FUNC uint64_t __swap64(uint64_t u64) {
|
||||
__asm__("\
|
||||
rol.w #8,%[u64]\n\
|
||||
rol.w #8,%L[u64]\n\
|
||||
swap %[u64]\n\
|
||||
swap %L[u64]\n\
|
||||
rol.w #8,%[u64]\n\
|
||||
rol.w #8,%L[u64]\n\
|
||||
exg %[u64],L%[u64]\n\
|
||||
":[u64]"+d"(u64)::"cc");
|
||||
return(u64);
|
||||
}
|
||||
|
||||
#else
|
||||
/* Unknown or undefined architecture. Perhaps compiling with "-strict -ansi", but should not use this header then anyway. */
|
||||
#undef bswap16
|
||||
#undef bswap24
|
||||
#undef bswap32
|
||||
#undef bswap64
|
||||
#define bswap16(x) (__builtin_constant_p(x))?__const_swap16(x):bswap16(x)
|
||||
#define bswap24(x) (__builtin_constant_p(x))?__const_swap24(x):bswap24(x)
|
||||
#define bswap32(x) (__builtin_constant_p(x))?__const_swap32(x):bswap32(x)
|
||||
#define bswap64(x) (__builtin_constant_p(x))?__const_swap64(x):bswap64(x)
|
||||
#endif
|
||||
|
||||
/* C implementations for constant values */
|
||||
|
||||
static __inline__ uint16_t __const_swap16(uint16_t u16) {
|
||||
return(u16>>8|u16<<8);
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __const_swap24(uint32_t u32) {
|
||||
return(((u32&0xff)<<16)|((u32&0xff00))|((u32&0xff0000)>>16));
|
||||
}
|
||||
|
||||
static __inline__ uint32_t __const_swap32(uint32_t u32) {
|
||||
return(((u32&0xff)<<24)|((u32&0xff00)<<8)|((u32&0xff0000)>>8)|((u32&0xff000000)>>24));
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __const_swap64(uint64_t u64) {
|
||||
return(((u64&0xffLL)<<56)|((u64&0xff00LL)<<40)|((u64&0xff0000LL)<<24)|((u64&0xff000000LL)<<8)|
|
||||
((u64&0xff00000000LL)>>8)|((u64&0xff0000000000LL)>>24)|((u64&0xff000000000000LL)>>40)|((u64&0xff00000000000000LL)>>56));
|
||||
}
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
|
||||
#endif /* __BYTESWAP_H */
|
||||
|
||||
/* vi:set ts=3: */
|
||||
|
||||
28
library/contrib/byteswap/byteswap_bswap16.c
Normal file
28
library/contrib/byteswap/byteswap_bswap16.c
Normal file
@ -0,0 +1,28 @@
|
||||
|
||||
#if defined(__PPC__) && defined(__GNUC__)
|
||||
|
||||
asm("\
|
||||
.text\n\
|
||||
.align 2\n\
|
||||
.globl bswap16\n\
|
||||
.type bswap16, @function\n\
|
||||
bswap16:\n\
|
||||
# rlwinm %r4,%r3,8,16,24\n\
|
||||
# rlwimi %r4,%r3,24,24,31\n\
|
||||
# or %r3,%r4,%r4\n\
|
||||
rlwimi %r3,%r3,16,8,15\n\
|
||||
srwi %r3,%r3,8\n\
|
||||
blr\n\
|
||||
");
|
||||
|
||||
#else
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
uint16_t bswap16(uint16_t u16)
|
||||
{
|
||||
return(u16>>8|u16<<8);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
29
library/contrib/byteswap/byteswap_bswap24.c
Normal file
29
library/contrib/byteswap/byteswap_bswap24.c
Normal file
@ -0,0 +1,29 @@
|
||||
|
||||
#if defined(__PPC__) && defined(__GNUC__)
|
||||
|
||||
asm(" .text\n\
|
||||
.align 2\n\
|
||||
.globl bswap24\n\
|
||||
.type bswap24, @function\n\
|
||||
bswap32:\n\
|
||||
rlwinm %r4,%r3,16,8,31\n\
|
||||
rlwimi %r4,%r3,0,16,24\n\
|
||||
or %r3,%r4,%r4\n\
|
||||
blr\n\
|
||||
");
|
||||
|
||||
#else
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
uint32_t bswap24(uint32_t u32)
|
||||
{
|
||||
return(
|
||||
((u32&0xff)<<16)|
|
||||
((u32&0xff00))|
|
||||
((u32&0xff0000)>>16)
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
31
library/contrib/byteswap/byteswap_bswap32.c
Normal file
31
library/contrib/byteswap/byteswap_bswap32.c
Normal file
@ -0,0 +1,31 @@
|
||||
|
||||
#if defined(__PPC__) && defined(__GNUC__)
|
||||
|
||||
asm(" .text\n\
|
||||
.align 2\n\
|
||||
.globl bswap32\n\
|
||||
.type bswap32, @function\n\
|
||||
bswap32:\n\
|
||||
rlwinm %r4,%r3,8,8,31\n\
|
||||
rlwimi %r4,%r3,24,0,7\n\
|
||||
rlwimi %r4,%r3,24,16,23\n\
|
||||
or %r3,%r4,%r4\n\
|
||||
blr\n\
|
||||
");
|
||||
|
||||
#else
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
uint32_t bswap32(uint32_t u32)
|
||||
{
|
||||
return(
|
||||
((u32&0xff)<<24)|
|
||||
((u32&0xff00)<<8)|
|
||||
((u32&0xff0000)>>8)|
|
||||
((u32&0xff000000)>>24)
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
48
library/contrib/byteswap/byteswap_bswap64.c
Normal file
48
library/contrib/byteswap/byteswap_bswap64.c
Normal file
@ -0,0 +1,48 @@
|
||||
|
||||
#if defined(USE_64_BIT_INTS)
|
||||
|
||||
#if defined(__PPC__) && defined(__GNUC__)
|
||||
|
||||
asm(" .text\n\
|
||||
.align 2\n\
|
||||
.globl bswap64\n\
|
||||
.type bswap64, @function\n\
|
||||
bswap64:\n\
|
||||
rlwinm %r5,%r3,8,8,31\n\
|
||||
rlwimi %r5,%r3,24,0,7\n\
|
||||
rlwimi %r5,%r3,24,16,23\n\
|
||||
rlwinm %r3,%r4,8,8,31\n\
|
||||
rlwimi %r3,%r4,24,0,7\n\
|
||||
rlwimi %r3,%r4,24,16,23\n\
|
||||
or %r4,%r5,%r5\n\
|
||||
blr\n\
|
||||
");
|
||||
|
||||
#else
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
uint64_t bswap64(uint64_t u64)
|
||||
{
|
||||
union {
|
||||
uint64_t ll;
|
||||
uint32_t l[2];
|
||||
} v={.ll=u64};
|
||||
uint32_t tmp;
|
||||
tmp=v.l[0];
|
||||
v.l[0]=((v.l[1]&0xff)<<24)|
|
||||
((v.l[1]&0xff00)<<8)|
|
||||
((v.l[1]&0xff0000)>>8)|
|
||||
((v.l[1]&0xff000000)>>24);
|
||||
v.l[1]=((tmp&0xff)<<24)|
|
||||
((tmp&0xff00)<<8)|
|
||||
((tmp&0xff0000)>>8)|
|
||||
((tmp&0xff000000)>>24);
|
||||
return(v.ll);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
70
library/contrib/byteswap/byteswap_swab.c
Normal file
70
library/contrib/byteswap/byteswap_swab.c
Normal file
@ -0,0 +1,70 @@
|
||||
|
||||
#if defined(__GNUC__) && defined(__PPC__)
|
||||
|
||||
/* r3=from, r4=to, r5=len/count, r6=index, r7=load/store/temp */
|
||||
|
||||
asm("\
|
||||
.text\n\
|
||||
.align 2\n\
|
||||
.globl swab\n\
|
||||
.type swab,@function\n\
|
||||
swab:\n\
|
||||
dcbt 0,%r3\n\
|
||||
srawi. %r5,%r5,1\n\
|
||||
bc 4,gt,.exit\n\
|
||||
andi. %r7,%r3,3 # Check if we start on an address evenly divisible by 4.\n\
|
||||
li %r6,0\n\
|
||||
bc 4,gt,.preploop\n\
|
||||
lhbrx %r7,%r6,%r3 # Fix alignment if needed.\n\
|
||||
sthx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,2\n\
|
||||
subi %r5,%r5,1\n\
|
||||
.preploop:\n\
|
||||
andi. %r7,%r5,1 # Check if even or odd number of 16-bit words.\n\
|
||||
srawi %r5,%r5,1 # Number of 32-bit words to half-swap.\n\
|
||||
mtctr %r5\n\
|
||||
bc 12,gt,.oddloop # Jump to loop for odd number of 16-bit words.\n\
|
||||
.loop: # Loop is 'unrolled' by reading/writing 32-bit words.\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
rotlwi %r7,%r7,16\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
bc 0,lt,.loop\n\
|
||||
.exit:\n\
|
||||
or %r3,%r4,%r4\n\
|
||||
blr\n\
|
||||
.oddloop:\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
rotlwi %r7,%r7,16\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
bc 0,lt,.oddloop\n\
|
||||
sub %r6,%r6,2\n\
|
||||
lhbrx %r7,%r6,%r3 # Fix last 16-bit word.\n\
|
||||
sthx %r7,%r6,%r4\n\
|
||||
or %r3,%r4,%r4\n\
|
||||
blr\n\
|
||||
");
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void *swab(void *from,void *to,ssize_t len)
|
||||
{
|
||||
int i;
|
||||
uint16_t u16,*u16in=from,*u16out=to;
|
||||
|
||||
for(i=0;i<(len>>1);i++) {
|
||||
u16=u16in[i];
|
||||
u16out[i]=u16>>8|u16<<8;
|
||||
}
|
||||
|
||||
return(u16out);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
91
library/contrib/byteswap/byteswap_swab24.c
Normal file
91
library/contrib/byteswap/byteswap_swab24.c
Normal file
@ -0,0 +1,91 @@
|
||||
|
||||
#if defined(__GNUC__) && defined(__PPC__)
|
||||
|
||||
/* r3=from, r4=to, r5=len/remaining, r6/r7=index & r7=temp, r8/r9/r10=read/write temp */
|
||||
|
||||
asm("\
|
||||
.text\n\
|
||||
.align 2\n\
|
||||
.globl swab24\n\
|
||||
.type swab24,@function\n\
|
||||
swab24:\n\
|
||||
dcbt 0,%r3\n\
|
||||
li %r7,3\n\
|
||||
divwu %r5,%r5,%r7\n\
|
||||
andi. %r7,%r5,3\n\
|
||||
srawi. %r5,%r5,2\n\
|
||||
mtctr %r5\n\
|
||||
or %r5,%r7,%r7\n\
|
||||
li %r6,0\n\
|
||||
bc 4,gt,.postfix\n\
|
||||
.loop:\n\
|
||||
lwbrx %r8,%r6,%r3\n\
|
||||
addi %r7,%r6,4\n\
|
||||
lwzx %r9,%r7,%r3\n\
|
||||
addi %r7,%r6,8\n\
|
||||
lwbrx %r10,%r7,%r3\n\
|
||||
rotlwi %r8,%r8,8\n\
|
||||
or %r7,%r9,%r9\n\
|
||||
rlwimi %r9,%r8,16,8,15\n\
|
||||
rlwimi %r9,%r10,8,16,23\n\
|
||||
rlwimi %r8,%r7,16,24,31\n\
|
||||
rotrwi %r10,%r10,8\n\
|
||||
rlwimi %r10,%r7,16,0,7\n\
|
||||
stwx %r8,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
stwx %r9,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
stwx %r10,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
bc 0,lt,.loop\n\
|
||||
.postfix: # Fix any remaining 24-bit words (number of remaining words in r5).\n\
|
||||
or. %r5,%r5,%r5\n\
|
||||
bc 4,gt,.exit\n\
|
||||
mtctr %r5\n\
|
||||
add %r3,%r3,%r6\n\
|
||||
add %r6,%r4,%r6\n\
|
||||
subi %r3,%r3,1\n\
|
||||
.fixloop:\n\
|
||||
lbzu %r7,1(%r3)\n\
|
||||
lbzu %r8,1(%r3)\n\
|
||||
lbzu %r9,1(%r3)\n\
|
||||
stb %r7,2(%r6)\n\
|
||||
stb %r8,1(%r6)\n\
|
||||
stb %r9,0(%r6)\n\
|
||||
addi %r6,%r6,3\n\
|
||||
bc 0,lt,.fixloop\n\
|
||||
.exit:\n\
|
||||
or %r3,%r4,%r4\n\
|
||||
blr\n\
|
||||
");
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
* Ugh, this is really very, very ineffiecient.
|
||||
* (But simple, understandable and safe)
|
||||
*/
|
||||
|
||||
void *swab24(void *from,void *to,ssize_t len)
|
||||
{
|
||||
uint8_t *src=from,B0,B1,B2,*dst=to;
|
||||
int i;
|
||||
|
||||
for(i=0;i<len;i+=3) {
|
||||
B0=src[i];
|
||||
B1=src[i+1];
|
||||
B2=src[i+2];
|
||||
dst[i]=B2;
|
||||
dst[i+1]=B1;
|
||||
dst[i+2]=B0;
|
||||
}
|
||||
|
||||
return(to);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
112
library/contrib/byteswap/byteswap_swab32.c
Normal file
112
library/contrib/byteswap/byteswap_swab32.c
Normal file
@ -0,0 +1,112 @@
|
||||
|
||||
#if defined(__GNUC__) && defined(__PPC__)
|
||||
|
||||
/* r3=from, r4=to, r5=len, r6=index, r7=load/store temp */
|
||||
|
||||
asm("\
|
||||
.text\n\
|
||||
.align 2\n\
|
||||
.globl swab32\n\
|
||||
.type swab32,@function\n\
|
||||
swab32:\n\
|
||||
srawi. %r5,%r5,2\n\
|
||||
li %r6,0\n\
|
||||
bc 4,gt,.exit\n\
|
||||
mtctr %r5\n\
|
||||
.loop:\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
bc 0,lt,.loop\n\
|
||||
.exit:\n\
|
||||
or %r3,%r4,%r4\n\
|
||||
blr\n\
|
||||
");
|
||||
|
||||
/* r3=from, r4=to, r5=len/temp, r6=index, r7=load/store temp, r8=cache hint
|
||||
*
|
||||
* The unrolled, cache-hinting version appears to be about 4.5% faster, but
|
||||
* in this case I opted for the smaller implementation. swab64() appears to
|
||||
* gain more from cache-hinting - probably because of it using more registers
|
||||
* for intermediate storage.
|
||||
asm("\
|
||||
.text\n\
|
||||
.align 2\n\
|
||||
.globl swab32\n\
|
||||
.type swab32,@function\n\
|
||||
swab32:\n\
|
||||
dcbt 0,%r3\n\
|
||||
andi. %r8,%r5,31 # The number of bytes handled in '.pre'. Used for prefetch hint.\n\
|
||||
srawi %r5,%r5,2 # Convert bytes-># of 32-bit words\n\
|
||||
andi. %r7,%r5,7\n\
|
||||
li %r6,0\n\
|
||||
bc 4,gt,.preploop\n\
|
||||
mtctr %r7\n\
|
||||
.pre: # One 32-bit word at a time until we have (nLeft%8)==0 \n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
bc 0,lt,.pre\n\
|
||||
.preploop:\n\
|
||||
srawi. %r5,%r5,3 # Divide by 8 again to get number of loops.\n\
|
||||
addi %r8,%r8,32 # Start address for next loop (from r3).\n\
|
||||
bc 4,gt,.exit\n\
|
||||
mtctr %r5\n\
|
||||
.loop: # Loop unrolled 8 times = 32 bytes = 1 cache-line (except on the 970).\n\
|
||||
dcbt %r8,%r3 # Cache hint (prefetch) for the next loop\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
lwbrx %r7,%r6,%r3\n\
|
||||
stwx %r7,%r6,%r4\n\
|
||||
addi %r6,%r6,4\n\
|
||||
addi %r8,%r8,32 # Update cache-hint offset\n\
|
||||
bc 0,lt,.loop\n\
|
||||
.exit:\n\
|
||||
or %r3,%r4,%r4\n\
|
||||
blr\n\
|
||||
");
|
||||
*/
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void *swab32(void *from,void *to,ssize_t len)
|
||||
{
|
||||
int i;
|
||||
uint32_t *u32in=from,*u32out=to,tmp;
|
||||
|
||||
for(i=0;i<(len>>2);i++) {
|
||||
tmp=u32in[i];
|
||||
u32out[i]=((tmp&0xff)<<24)|
|
||||
((tmp&0xff00)<<8)|
|
||||
((tmp&0xff0000)>>8)|
|
||||
((tmp&0xff000000)>>24);
|
||||
}
|
||||
|
||||
return(to);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
101
library/contrib/byteswap/byteswap_swab64.c
Normal file
101
library/contrib/byteswap/byteswap_swab64.c
Normal file
@ -0,0 +1,101 @@
|
||||
|
||||
#if defined(__GNUC__) && defined(__PPC__)
|
||||
|
||||
/* r3=from, r4=to, r5=len/temp, r6/r7=index, r8/r9=load/store temp, r10=cache hint */
|
||||
|
||||
/* This version is unrolled and uses cache-hinting. It appears to gain about 10%
|
||||
* over a non-unrolled, non-hinting version.
|
||||
*/
|
||||
|
||||
asm("\
|
||||
.text\n\
|
||||
.align 2\n\
|
||||
.globl swab64\n\
|
||||
.type swab64,@function\n\
|
||||
swab64:\n\
|
||||
dcbt 0,%r3\n\
|
||||
andi. %r10,%r5,31 # The number of bytes handled in '.pre'. Used for prefetch hint.\n\
|
||||
srawi %r5,%r5,3 # Convert bytes-># of 64-bit words\n\
|
||||
andi. %r7,%r5,3\n\
|
||||
li %r6,0\n\
|
||||
bc 4,gt,.preploop\n\
|
||||
mtctr %r7\n\
|
||||
.pre: # One 64-bit word at a time until we have (nLeft%4)==0 \n\
|
||||
lwbrx %r8,%r6,%r3\n\
|
||||
addi %r7,%r6,4\n\
|
||||
lwbrx %r9,%r7,%r3\n\
|
||||
stwx %r8,%r7,%r4\n\
|
||||
stwx %r9,%r6,%r4\n\
|
||||
addi %r6,%r6,8\n\
|
||||
bc 0,lt,.pre\n\
|
||||
.preploop:\n\
|
||||
srawi. %r5,%r5,2 # Divide by 4 again to get number of loops.\n\
|
||||
addi %r10,%r10,32 # Start address for next loop.\n\
|
||||
bc 4,gt,.exit\n\
|
||||
mtctr %r5\n\
|
||||
.loop: # Loop unrolled 4 times = 32 bytes = 1 cache-line (except on the 970).\n\
|
||||
dcbt %r10,%r3 # Cache hint (prefetch) for the next iteration\n\
|
||||
lwbrx %r8,%r6,%r3\n\
|
||||
addi %r7,%r6,4\n\
|
||||
lwbrx %r9,%r7,%r3\n\
|
||||
stwx %r8,%r7,%r4\n\
|
||||
stwx %r9,%r6,%r4\n\
|
||||
addi %r6,%r6,8\n\
|
||||
lwbrx %r8,%r6,%r3\n\
|
||||
addi %r7,%r6,4\n\
|
||||
lwbrx %r9,%r7,%r3\n\
|
||||
stwx %r8,%r7,%r4\n\
|
||||
stwx %r9,%r6,%r4\n\
|
||||
addi %r6,%r6,8\n\
|
||||
lwbrx %r8,%r6,%r3\n\
|
||||
addi %r7,%r6,4\n\
|
||||
lwbrx %r9,%r7,%r3\n\
|
||||
stwx %r8,%r7,%r4\n\
|
||||
stwx %r9,%r6,%r4\n\
|
||||
addi %r6,%r6,8\n\
|
||||
lwbrx %r8,%r6,%r3\n\
|
||||
addi %r7,%r6,4\n\
|
||||
lwbrx %r9,%r7,%r3\n\
|
||||
stwx %r8,%r7,%r4\n\
|
||||
stwx %r9,%r6,%r4\n\
|
||||
addi %r6,%r6,8\n\
|
||||
addi %r10,%r10,32 # Update cache-hint offset\n\
|
||||
bc 0,lt,.loop\n\
|
||||
.exit:\n\
|
||||
or %r3,%r4,%r4\n\
|
||||
blr\n\
|
||||
");
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void *swab64(void *from,void *to,ssize_t len)
|
||||
{
|
||||
int i;
|
||||
struct {
|
||||
uint32_t u32[2];
|
||||
} *u64in=from,*u64out=to;
|
||||
uint32_t tmp1,tmp2;
|
||||
|
||||
for(i=0;i<(len>>3);i++) {
|
||||
tmp1=u64in[i].u32[0];
|
||||
tmp2=u64in[i].u32[1];
|
||||
u64out[i].u32[0]=((tmp2&0xff)<<24)|
|
||||
((tmp2&0xff00)<<8)|
|
||||
((tmp2&0xff0000)>>8)|
|
||||
((tmp2&0xff000000)>>24);
|
||||
u64out[i].u32[1]=((tmp1&0xff)<<24)|
|
||||
((tmp1&0xff00)<<8)|
|
||||
((tmp1&0xff0000)>>8)|
|
||||
((tmp1&0xff000000)>>24);
|
||||
}
|
||||
|
||||
return(to);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* vi:set ts=3: */
|
||||
|
||||
Reference in New Issue
Block a user