1
0
mirror of https://github.com/adtools/clib2.git synced 2025-12-08 14:59:05 +00:00

- Added a directory to hold contributed code which has not been integrated

with the library yet.

- The byteswap code was contributed by Peter Bengtsson. Thank you very much!


git-svn-id: file:///Users/olsen/Code/migration-svn-zu-git/logical-line-staging/clib2/trunk@15163 87f5fb63-7c3d-0410-a384-fd976d0f7a62
This commit is contained in:
Olaf Barthel
2006-11-13 09:49:49 +00:00
parent 7e1d5d6f6a
commit 66303e9ba2
10 changed files with 696 additions and 0 deletions

3
library/contrib/README Normal file
View File

@ -0,0 +1,3 @@
This directory contains contributions which have not yet been integrated
with the clib2 library build but which should be in the CVS repository
both for safekeeping and for you to look at and adapt.

View File

@ -0,0 +1,183 @@
#ifndef __BYTESWAP_H
#define __BYTESWAP_H
#include <sys/types.h>
#include <stdint.h>
#if defined(__GNUC__)
#define __CONST_FUNC __attribute__((const))
#else
#define __CONST_FUNC /* Nothing */
#endif
/* Single value byteswap functions. */
extern __CONST_FUNC uint16_t bswap16(uint16_t);
extern __CONST_FUNC uint32_t bswap24(uint32_t);
extern __CONST_FUNC uint32_t bswap32(uint32_t);
#ifdef INT64_MIN
extern __CONST_FUNC uint64_t bswap64(uint64_t);
#endif
/* Block byteswap functions. The swab() function usually resides in unistd.h, so perhaps it should be moved there? */
/* NOTE: Contrary to the standard swab(), this version returns the "to" pointer and the pointers are not restrict
* qualified - so swapping buffer-contents in-place is supported.
* Also, swab24(), swab32() and swab64() are non-standard functions.
*/
extern void *swab(void *from,void *to,ssize_t nbytes);
extern void *swab24(void *from,void *to,ssize_t nbytes); /* Same as swab(), but operates on 24-bit words instead. */
extern void *swab32(void *from,void *to,ssize_t nbytes); /* Same as swab(), but operates on 32-bit words instead. */
extern void *swab64(void *from,void *to,ssize_t nbytes); /* Same as swab(), but operates on 64-bit words instead. */
#define swab16(x) swab(x)
/*
* Optimized inline-versions for the single-value functions follow.
* Only GCC+PPC and GCC+m68k support for now.
*/
#if defined(__GNUC__)
/* Select implementation. */
#define bswap16(x) (__builtin_constant_p(x))?__const_swap16(x):__swap16(x)
#define bswap24(x) (__builtin_constant_p(x))?__const_swap24(x):__swap24(x)
#define bswap32(x) (__builtin_constant_p(x))?__const_swap32(x):__swap32(x)
#define bswap64(x) (__builtin_constant_p(x))?__const_swap64(x):__swap64(x)
/* Assembler implementations */
#if defined(__PPC__)
static __inline__ __CONST_FUNC uint16_t __swap16(uint16_t u16) {
uint_fast16_t result;
__asm__("\
rlwinm %[result],%[u16],8,16,24\n\
rlwimi %[result],%[u16],24,24,31\n\
":[result]"=&r"(result):[u16]"r"(u16));
return(result);
}
static __inline__ __CONST_FUNC uint32_t __swap24(uint32_t u32) {
uint_fast32_t result;
__asm__("\
rlwinm %[result],%[u32],16,8,31\n\
rlwimi %[result],%[u32],0,16,24\n\
":[result]"=&r"(result):[u32]"r"(u32));
return(result);
}
static __inline__ __CONST_FUNC uint32_t __swap32(uint32_t u32) {
uint_fast32_t result;
__asm__("\
rlwinm %[result],%[u32],8,8,31\n\
rlwimi %[result],%[u32],24,0,7\n\
rlwimi %[result],%[u32],24,16,23\n\
":[result]"=&r"(result):[u32]"r"(u32));
return(result);
}
/*
* Note: __swap64() might perhaps be optimized a bit more by scheduling the
* instructions to alternate register-use, but this instead means there
* are two less registers free since "u64" and "result" may no longer overlap.
* Decisions, decisions....
*/
static __inline__ __CONST_FUNC uint64_t __swap64(uint64_t u64) {
uint_fast64_t result;
uint_fast32_t tmp;
__asm__("\
rlwinm %[tmp],%[u64],8,8,31\n\
rlwimi %[tmp],%[u64],24,0,7\n\
rlwimi %[tmp],%[u64],24,16,23\n\
rlwinm %[result],%L[u64],8,8,31\n\
rlwimi %[result],%L[u64],24,0,7\n\
rlwimi %[result],%L[u64],24,16,23\n\
or %L[result],%[tmp],%[tmp]\n\
":[result]"=r"(result),[tmp]"=&r"(tmp):[u64]"r"(u64));
return(result);
}
#elif defined(__mc68020__)
static __inline__ __CONST_FUNC uint16_t __swap16(uint16_t u16) {
__asm__("\
rol.w #8,%[u16]\n\
":[u16]"+d"(u16)::"cc");
return(u16);
}
static __inline__ __CONST_FUNC uint32_t __swap24(uint32_t u32) {
__asm__("\
rol.w #8,%[u32]\n\
swap %[u32]\n\
rol.w #8,%[u32]\n\
ror.l #8,%[u32]\n\
":[u32]"+d"(u32)::"cc");
return(u32);
}
static __inline__ __CONST_FUNC uint32_t __swap32(uint32_t u32) {
__asm__("\
rol.w #8,%[u32]\n\
swap %[u32]\n\
rol.w #8,%[u32]\n\
":[u32]"+d"(u32)::"cc");
return(u32);
}
static __inline__ __CONST_FUNC uint64_t __swap64(uint64_t u64) {
__asm__("\
rol.w #8,%[u64]\n\
rol.w #8,%L[u64]\n\
swap %[u64]\n\
swap %L[u64]\n\
rol.w #8,%[u64]\n\
rol.w #8,%L[u64]\n\
exg %[u64],L%[u64]\n\
":[u64]"+d"(u64)::"cc");
return(u64);
}
#else
/* Unknown or undefined architecture. Perhaps compiling with "-strict -ansi", but should not use this header then anyway. */
#undef bswap16
#undef bswap24
#undef bswap32
#undef bswap64
#define bswap16(x) (__builtin_constant_p(x))?__const_swap16(x):bswap16(x)
#define bswap24(x) (__builtin_constant_p(x))?__const_swap24(x):bswap24(x)
#define bswap32(x) (__builtin_constant_p(x))?__const_swap32(x):bswap32(x)
#define bswap64(x) (__builtin_constant_p(x))?__const_swap64(x):bswap64(x)
#endif
/* C implementations for constant values */
static __inline__ uint16_t __const_swap16(uint16_t u16) {
return(u16>>8|u16<<8);
}
static __inline__ uint32_t __const_swap24(uint32_t u32) {
return(((u32&0xff)<<16)|((u32&0xff00))|((u32&0xff0000)>>16));
}
static __inline__ uint32_t __const_swap32(uint32_t u32) {
return(((u32&0xff)<<24)|((u32&0xff00)<<8)|((u32&0xff0000)>>8)|((u32&0xff000000)>>24));
}
static __inline__ uint64_t __const_swap64(uint64_t u64) {
return(((u64&0xffLL)<<56)|((u64&0xff00LL)<<40)|((u64&0xff0000LL)<<24)|((u64&0xff000000LL)<<8)|
((u64&0xff00000000LL)>>8)|((u64&0xff0000000000LL)>>24)|((u64&0xff000000000000LL)>>40)|((u64&0xff00000000000000LL)>>56));
}
#endif /* __GNUC__ */
#endif /* __BYTESWAP_H */
/* vi:set ts=3: */

View File

@ -0,0 +1,28 @@
#if defined(__PPC__) && defined(__GNUC__)
asm("\
.text\n\
.align 2\n\
.globl bswap16\n\
.type bswap16, @function\n\
bswap16:\n\
# rlwinm %r4,%r3,8,16,24\n\
# rlwimi %r4,%r3,24,24,31\n\
# or %r3,%r4,%r4\n\
rlwimi %r3,%r3,16,8,15\n\
srwi %r3,%r3,8\n\
blr\n\
");
#else
#include <stdint.h>
uint16_t bswap16(uint16_t u16)
{
return(u16>>8|u16<<8);
}
#endif

View File

@ -0,0 +1,29 @@
#if defined(__PPC__) && defined(__GNUC__)
asm(" .text\n\
.align 2\n\
.globl bswap24\n\
.type bswap24, @function\n\
bswap32:\n\
rlwinm %r4,%r3,16,8,31\n\
rlwimi %r4,%r3,0,16,24\n\
or %r3,%r4,%r4\n\
blr\n\
");
#else
#include <stdint.h>
uint32_t bswap24(uint32_t u32)
{
return(
((u32&0xff)<<16)|
((u32&0xff00))|
((u32&0xff0000)>>16)
);
}
#endif

View File

@ -0,0 +1,31 @@
#if defined(__PPC__) && defined(__GNUC__)
asm(" .text\n\
.align 2\n\
.globl bswap32\n\
.type bswap32, @function\n\
bswap32:\n\
rlwinm %r4,%r3,8,8,31\n\
rlwimi %r4,%r3,24,0,7\n\
rlwimi %r4,%r3,24,16,23\n\
or %r3,%r4,%r4\n\
blr\n\
");
#else
#include <stdint.h>
uint32_t bswap32(uint32_t u32)
{
return(
((u32&0xff)<<24)|
((u32&0xff00)<<8)|
((u32&0xff0000)>>8)|
((u32&0xff000000)>>24)
);
}
#endif

View File

@ -0,0 +1,48 @@
#if defined(USE_64_BIT_INTS)
#if defined(__PPC__) && defined(__GNUC__)
asm(" .text\n\
.align 2\n\
.globl bswap64\n\
.type bswap64, @function\n\
bswap64:\n\
rlwinm %r5,%r3,8,8,31\n\
rlwimi %r5,%r3,24,0,7\n\
rlwimi %r5,%r3,24,16,23\n\
rlwinm %r3,%r4,8,8,31\n\
rlwimi %r3,%r4,24,0,7\n\
rlwimi %r3,%r4,24,16,23\n\
or %r4,%r5,%r5\n\
blr\n\
");
#else
#include <stdint.h>
uint64_t bswap64(uint64_t u64)
{
union {
uint64_t ll;
uint32_t l[2];
} v={.ll=u64};
uint32_t tmp;
tmp=v.l[0];
v.l[0]=((v.l[1]&0xff)<<24)|
((v.l[1]&0xff00)<<8)|
((v.l[1]&0xff0000)>>8)|
((v.l[1]&0xff000000)>>24);
v.l[1]=((tmp&0xff)<<24)|
((tmp&0xff00)<<8)|
((tmp&0xff0000)>>8)|
((tmp&0xff000000)>>24);
return(v.ll);
}
#endif
#endif

View File

@ -0,0 +1,70 @@
#if defined(__GNUC__) && defined(__PPC__)
/* r3=from, r4=to, r5=len/count, r6=index, r7=load/store/temp */
asm("\
.text\n\
.align 2\n\
.globl swab\n\
.type swab,@function\n\
swab:\n\
dcbt 0,%r3\n\
srawi. %r5,%r5,1\n\
bc 4,gt,.exit\n\
andi. %r7,%r3,3 # Check if we start on an address evenly divisible by 4.\n\
li %r6,0\n\
bc 4,gt,.preploop\n\
lhbrx %r7,%r6,%r3 # Fix alignment if needed.\n\
sthx %r7,%r6,%r4\n\
addi %r6,%r6,2\n\
subi %r5,%r5,1\n\
.preploop:\n\
andi. %r7,%r5,1 # Check if even or odd number of 16-bit words.\n\
srawi %r5,%r5,1 # Number of 32-bit words to half-swap.\n\
mtctr %r5\n\
bc 12,gt,.oddloop # Jump to loop for odd number of 16-bit words.\n\
.loop: # Loop is 'unrolled' by reading/writing 32-bit words.\n\
lwbrx %r7,%r6,%r3\n\
rotlwi %r7,%r7,16\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
bc 0,lt,.loop\n\
.exit:\n\
or %r3,%r4,%r4\n\
blr\n\
.oddloop:\n\
lwbrx %r7,%r6,%r3\n\
rotlwi %r7,%r7,16\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
bc 0,lt,.oddloop\n\
sub %r6,%r6,2\n\
lhbrx %r7,%r6,%r3 # Fix last 16-bit word.\n\
sthx %r7,%r6,%r4\n\
or %r3,%r4,%r4\n\
blr\n\
");
#else
#include <sys/types.h>
#include <stdint.h>
void *swab(void *from,void *to,ssize_t len)
{
int i;
uint16_t u16,*u16in=from,*u16out=to;
for(i=0;i<(len>>1);i++) {
u16=u16in[i];
u16out[i]=u16>>8|u16<<8;
}
return(u16out);
}
#endif

View File

@ -0,0 +1,91 @@
#if defined(__GNUC__) && defined(__PPC__)
/* r3=from, r4=to, r5=len/remaining, r6/r7=index & r7=temp, r8/r9/r10=read/write temp */
asm("\
.text\n\
.align 2\n\
.globl swab24\n\
.type swab24,@function\n\
swab24:\n\
dcbt 0,%r3\n\
li %r7,3\n\
divwu %r5,%r5,%r7\n\
andi. %r7,%r5,3\n\
srawi. %r5,%r5,2\n\
mtctr %r5\n\
or %r5,%r7,%r7\n\
li %r6,0\n\
bc 4,gt,.postfix\n\
.loop:\n\
lwbrx %r8,%r6,%r3\n\
addi %r7,%r6,4\n\
lwzx %r9,%r7,%r3\n\
addi %r7,%r6,8\n\
lwbrx %r10,%r7,%r3\n\
rotlwi %r8,%r8,8\n\
or %r7,%r9,%r9\n\
rlwimi %r9,%r8,16,8,15\n\
rlwimi %r9,%r10,8,16,23\n\
rlwimi %r8,%r7,16,24,31\n\
rotrwi %r10,%r10,8\n\
rlwimi %r10,%r7,16,0,7\n\
stwx %r8,%r6,%r4\n\
addi %r6,%r6,4\n\
stwx %r9,%r6,%r4\n\
addi %r6,%r6,4\n\
stwx %r10,%r6,%r4\n\
addi %r6,%r6,4\n\
bc 0,lt,.loop\n\
.postfix: # Fix any remaining 24-bit words (number of remaining words in r5).\n\
or. %r5,%r5,%r5\n\
bc 4,gt,.exit\n\
mtctr %r5\n\
add %r3,%r3,%r6\n\
add %r6,%r4,%r6\n\
subi %r3,%r3,1\n\
.fixloop:\n\
lbzu %r7,1(%r3)\n\
lbzu %r8,1(%r3)\n\
lbzu %r9,1(%r3)\n\
stb %r7,2(%r6)\n\
stb %r8,1(%r6)\n\
stb %r9,0(%r6)\n\
addi %r6,%r6,3\n\
bc 0,lt,.fixloop\n\
.exit:\n\
or %r3,%r4,%r4\n\
blr\n\
");
#else
#include <sys/types.h>
#include <stdint.h>
/*
* Ugh, this is really very, very ineffiecient.
* (But simple, understandable and safe)
*/
void *swab24(void *from,void *to,ssize_t len)
{
uint8_t *src=from,B0,B1,B2,*dst=to;
int i;
for(i=0;i<len;i+=3) {
B0=src[i];
B1=src[i+1];
B2=src[i+2];
dst[i]=B2;
dst[i+1]=B1;
dst[i+2]=B0;
}
return(to);
}
#endif

View File

@ -0,0 +1,112 @@
#if defined(__GNUC__) && defined(__PPC__)
/* r3=from, r4=to, r5=len, r6=index, r7=load/store temp */
asm("\
.text\n\
.align 2\n\
.globl swab32\n\
.type swab32,@function\n\
swab32:\n\
srawi. %r5,%r5,2\n\
li %r6,0\n\
bc 4,gt,.exit\n\
mtctr %r5\n\
.loop:\n\
lwbrx %r7,%r6,%r3\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
bc 0,lt,.loop\n\
.exit:\n\
or %r3,%r4,%r4\n\
blr\n\
");
/* r3=from, r4=to, r5=len/temp, r6=index, r7=load/store temp, r8=cache hint
*
* The unrolled, cache-hinting version appears to be about 4.5% faster, but
* in this case I opted for the smaller implementation. swab64() appears to
* gain more from cache-hinting - probably because of it using more registers
* for intermediate storage.
asm("\
.text\n\
.align 2\n\
.globl swab32\n\
.type swab32,@function\n\
swab32:\n\
dcbt 0,%r3\n\
andi. %r8,%r5,31 # The number of bytes handled in '.pre'. Used for prefetch hint.\n\
srawi %r5,%r5,2 # Convert bytes-># of 32-bit words\n\
andi. %r7,%r5,7\n\
li %r6,0\n\
bc 4,gt,.preploop\n\
mtctr %r7\n\
.pre: # One 32-bit word at a time until we have (nLeft%8)==0 \n\
lwbrx %r7,%r6,%r3\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
bc 0,lt,.pre\n\
.preploop:\n\
srawi. %r5,%r5,3 # Divide by 8 again to get number of loops.\n\
addi %r8,%r8,32 # Start address for next loop (from r3).\n\
bc 4,gt,.exit\n\
mtctr %r5\n\
.loop: # Loop unrolled 8 times = 32 bytes = 1 cache-line (except on the 970).\n\
dcbt %r8,%r3 # Cache hint (prefetch) for the next loop\n\
lwbrx %r7,%r6,%r3\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
lwbrx %r7,%r6,%r3\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
lwbrx %r7,%r6,%r3\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
lwbrx %r7,%r6,%r3\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
lwbrx %r7,%r6,%r3\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
lwbrx %r7,%r6,%r3\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
lwbrx %r7,%r6,%r3\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
lwbrx %r7,%r6,%r3\n\
stwx %r7,%r6,%r4\n\
addi %r6,%r6,4\n\
addi %r8,%r8,32 # Update cache-hint offset\n\
bc 0,lt,.loop\n\
.exit:\n\
or %r3,%r4,%r4\n\
blr\n\
");
*/
#else
#include <sys/types.h>
#include <stdint.h>
void *swab32(void *from,void *to,ssize_t len)
{
int i;
uint32_t *u32in=from,*u32out=to,tmp;
for(i=0;i<(len>>2);i++) {
tmp=u32in[i];
u32out[i]=((tmp&0xff)<<24)|
((tmp&0xff00)<<8)|
((tmp&0xff0000)>>8)|
((tmp&0xff000000)>>24);
}
return(to);
}
#endif

View File

@ -0,0 +1,101 @@
#if defined(__GNUC__) && defined(__PPC__)
/* r3=from, r4=to, r5=len/temp, r6/r7=index, r8/r9=load/store temp, r10=cache hint */
/* This version is unrolled and uses cache-hinting. It appears to gain about 10%
* over a non-unrolled, non-hinting version.
*/
asm("\
.text\n\
.align 2\n\
.globl swab64\n\
.type swab64,@function\n\
swab64:\n\
dcbt 0,%r3\n\
andi. %r10,%r5,31 # The number of bytes handled in '.pre'. Used for prefetch hint.\n\
srawi %r5,%r5,3 # Convert bytes-># of 64-bit words\n\
andi. %r7,%r5,3\n\
li %r6,0\n\
bc 4,gt,.preploop\n\
mtctr %r7\n\
.pre: # One 64-bit word at a time until we have (nLeft%4)==0 \n\
lwbrx %r8,%r6,%r3\n\
addi %r7,%r6,4\n\
lwbrx %r9,%r7,%r3\n\
stwx %r8,%r7,%r4\n\
stwx %r9,%r6,%r4\n\
addi %r6,%r6,8\n\
bc 0,lt,.pre\n\
.preploop:\n\
srawi. %r5,%r5,2 # Divide by 4 again to get number of loops.\n\
addi %r10,%r10,32 # Start address for next loop.\n\
bc 4,gt,.exit\n\
mtctr %r5\n\
.loop: # Loop unrolled 4 times = 32 bytes = 1 cache-line (except on the 970).\n\
dcbt %r10,%r3 # Cache hint (prefetch) for the next iteration\n\
lwbrx %r8,%r6,%r3\n\
addi %r7,%r6,4\n\
lwbrx %r9,%r7,%r3\n\
stwx %r8,%r7,%r4\n\
stwx %r9,%r6,%r4\n\
addi %r6,%r6,8\n\
lwbrx %r8,%r6,%r3\n\
addi %r7,%r6,4\n\
lwbrx %r9,%r7,%r3\n\
stwx %r8,%r7,%r4\n\
stwx %r9,%r6,%r4\n\
addi %r6,%r6,8\n\
lwbrx %r8,%r6,%r3\n\
addi %r7,%r6,4\n\
lwbrx %r9,%r7,%r3\n\
stwx %r8,%r7,%r4\n\
stwx %r9,%r6,%r4\n\
addi %r6,%r6,8\n\
lwbrx %r8,%r6,%r3\n\
addi %r7,%r6,4\n\
lwbrx %r9,%r7,%r3\n\
stwx %r8,%r7,%r4\n\
stwx %r9,%r6,%r4\n\
addi %r6,%r6,8\n\
addi %r10,%r10,32 # Update cache-hint offset\n\
bc 0,lt,.loop\n\
.exit:\n\
or %r3,%r4,%r4\n\
blr\n\
");
#else
#include <sys/types.h>
#include <stdint.h>
void *swab64(void *from,void *to,ssize_t len)
{
int i;
struct {
uint32_t u32[2];
} *u64in=from,*u64out=to;
uint32_t tmp1,tmp2;
for(i=0;i<(len>>3);i++) {
tmp1=u64in[i].u32[0];
tmp2=u64in[i].u32[1];
u64out[i].u32[0]=((tmp2&0xff)<<24)|
((tmp2&0xff00)<<8)|
((tmp2&0xff0000)>>8)|
((tmp2&0xff000000)>>24);
u64out[i].u32[1]=((tmp1&0xff)<<24)|
((tmp1&0xff00)<<8)|
((tmp1&0xff0000)>>8)|
((tmp1&0xff000000)>>24);
}
return(to);
}
#endif
/* vi:set ts=3: */