![]() |
SH4ZAM! 0.1.0
Fast math library for the Sega Dreamcast's SH4 CPU
|


Go to the source code of this file.
Macros | |
Barriers | |
Macros for preventing GCC from reordering instructions. | |
| #define | SHZ_MEMORY_BARRIER_SOFT() |
| #define | SHZ_MEMORY_BARRIER_HARD() |
Functions | |
| void | shz_dcache_alloc_line (void *src) SHZ_NOEXCEPT |
| void * | shz_memcpy (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src, size_t bytes) SHZ_NOEXCEPT |
Specializations | |
Specialized routines for specific sizes + alignments. | |
| void * | shz_memcpy1 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src, size_t bytes) SHZ_NOEXCEPT |
| void * | shz_memcpy2 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src, size_t bytes) SHZ_NOEXCEPT |
| void * | shz_memcpy4 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src, size_t bytes) SHZ_NOEXCEPT |
| void * | shz_memcpy8 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src, size_t bytes) SHZ_NOEXCEPT |
| void * | shz_memset8 (void *dst, uint64_t value, size_t bytes) SHZ_NOEXCEPT |
| void * | shz_memcpy32 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src, size_t bytes) SHZ_NOEXCEPT |
| void * | shz_sq_memcpy32 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src, size_t bytes) SHZ_NOEXCEPT |
| void * | shz_sq_memcpy32_xmtrx (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src, size_t bytes) SHZ_NOEXCEPT |
| void * | shz_memcpy64 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src, size_t bytes) SHZ_NOEXCEPT |
| void * | shz_memcpy128 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src, size_t bytes) SHZ_NOEXCEPT |
Constant-sized Operations | |
Specialized routines for operating on statically sized buffers. | |
| void | shz_memcpy2_16 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src) SHZ_NOEXCEPT |
| void | shz_memset2_16 (void *dst, uint16_t value) SHZ_NOEXCEPT |
| void | shz_memcpy4_16 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src) SHZ_NOEXCEPT |
| void | shz_memcpy32_1 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src) SHZ_NOEXCEPT |
| void | shz_memswap32_1 (void *SHZ_RESTRICT p1, void *SHZ_RESTRICT p2) SHZ_NOEXCEPT |
| void | shz_memswap32_1_xmtrx (void *SHZ_RESTRICT p1, void *SHZ_RESTRICT p2) SHZ_NOEXCEPT |
| void * | shz_sq_memcpy32_1 (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src) SHZ_NOEXCEPT |
| void * | shz_sq_memcpy32_1_xmtrx (void *SHZ_RESTRICT dst, const void *SHZ_RESTRICT src) SHZ_NOEXCEPT |
Memory API.
API built around copying, assigning, and working with memory.
Definition in file shz_mem.h.
| #define SHZ_MEMORY_BARRIER_SOFT | ( | ) |
| #define SHZ_MEMORY_BARRIER_HARD | ( | ) |
| void shz_dcache_alloc_line | ( | void * | src | ) |
Intrinsic around the SH4 MOVCA.L instruction.
Preallocates the cache-line containing src.
Zero-initializes all 32-bytes within the src cache-line, setting the valid bit to 1.
| void * shz_memcpy | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src, | ||
| size_t | bytes ) |
Generic drop-in fast memcpy() replacement.
Copies bytes from src to dst, determining the most efficient specialization to call into at run-time, returning dst.
There are no alignment or size requirements for this routine.
src and dst alignments and batch sizes, you can micro-optimize by calling into the most specific memcpy() specialization for your given scenario, over just using this generic implementation, which must choose which one to use at run-time.dst and src buffers should not be overlapping.| void * shz_memcpy1 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src, | ||
| size_t | bytes ) |
Copies an unaligned buffer to another one byte at a time.
The dst pointer is returned.
dst and src buffers should not be overlapping.| void * shz_memcpy2 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src, | ||
| size_t | bytes ) |
Copies from one 2-byte aligned buffer to another two bytes at a time.
The dst pointer is returned.
dst and src must both be aligned by at least 2 bytes, and bytes must be a multiple of 2.dst and src buffers should not be overlapping. | void * shz_memcpy4 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src, | ||
| size_t | bytes ) |
Copies a from one 4-byte aligned buffer to another 4 bytes at a time.
The dst buffer is returned.
dst and src must both be aligned by at least 4 bytes, and bytes must be a multiple of 4.dst and src buffers should not be overlapping. | void * shz_memcpy8 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src, | ||
| size_t | bytes ) |
Copies a from one 8-byte aligned buffer to another 8 bytes at a time.
The dst buffer is returned.
dst and src must both be aligned by at least 8 bytes, and bytes must be a multiple of 8.src and dst should not overlap. | void * shz_memset8 | ( | void * | dst, |
| uint64_t | value, | ||
| size_t | bytes ) |
Assigns the given 8-byte value to the bytes in dst.
dst should be at least 8-byte aligned, and bytes should be a multiple of 8! | void * shz_memcpy32 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src, | ||
| size_t | bytes ) |
Copies bytes from the src to the dst buffer in 32-byte chunks.
Transfers from 8-byte aligned buffer, src to 32-byte aligned buffer, dst, 32 bytes at a time. Returns the dst address.
dst must be 32-byte aligned, while src can be only 8-byte aligned. bytes must be a multiple of 32.src and dst buffers must not overlap.| void * shz_sq_memcpy32 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src, | ||
| size_t | bytes ) |
Copies bytes from src to dst in 32-byte chunks, using the Store Queues.
Transfers from 8-byte aligned buffer, src to 4-byte aligned address, dst, 32 bytes at a time, writing through the cache, using the SH4's Store Queues. Returns the dst address.
src must be at least 8-byte aligned, while dst can be only 4-byte aligned. bytes must be a multiple of 32.dst before initiating the copy... Which means this routine becomes slower than doing memory-to-memory copies with shz_memcpy32().| void * shz_sq_memcpy32_xmtrx | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src, | ||
| size_t | bytes ) |
Copies bytes from src to dst in 32-byte chunks, using the Store Queues and XMTRX.
Equiavalent to shz_sq_memcpy32(), except copying is done through XMTRX.
| void * shz_memcpy64 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src, | ||
| size_t | bytes ) |
Specialized memcpy() variant for copying multiples of 64-bytes.
Copies a from an 8-byte aligned buffer to a 32-byte aligned buffer, 64 bytes at a time. Returns the dst address.
src and dst buffers must not overlap.dst must be 32-byte aligned, while src can be only 8-byte aligned. bytes must be a multiple of 64. | void * shz_memcpy128 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src, | ||
| size_t | bytes ) |
Specialized memcpy() variant for copying multiples of 128 bytes.
Copies a from an 8-byte aligned buffer to a 32-byte aligned buffer, 128 bytes at a time. Returns the dst address.
src and dst buffers must not overlap.dst must be 32-byte aligned, while src can be only 8-byte aligned. bytes must be a multiple of 128. | void shz_memcpy2_16 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src ) |
Copies 16 shorts from src to dst.
src and dst buffers must not overlap.dst and src must both be aligned by at least two bytes. | void shz_memset2_16 | ( | void * | dst, |
| uint16_t | value ) |
Sets the values of the 16 shorts pointed to by dst to the given value.
dst must be aligned by at least two bytes. | void shz_memcpy4_16 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src ) |
Copies 16 4-byte, long values from src to dst.
src and dst buffers must not overlap.src and dst buffers must both be at least 4-byte aligned. | void shz_memcpy32_1 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src ) |
Copies 32 bytes from p1 to p2 as a single chunk.
dst must be 32-byte aligned, while src can be only 8-byte aligned. | void shz_memswap32_1 | ( | void *SHZ_RESTRICT | p1, |
| void *SHZ_RESTRICT | p2 ) |
Swaps the values within the given 32-byte buffers.
p1 and p2 must be at least 8-byte aligned. | void shz_memswap32_1_xmtrx | ( | void *SHZ_RESTRICT | p1, |
| void *SHZ_RESTRICT | p2 ) |
Swaps the values within the given 32-byte buffers, using XMTRX.
Equivalent to shz_memcpy32_1(), except copying is done through XMTRX.
| void * shz_sq_memcpy32_1 | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src ) |
Copies src to dst in a single 32-byte transaction using the Store Queues.
dst must be at least 4-byte aligned, while src must be at least 8-byte aligned.| void * shz_sq_memcpy32_1_xmtrx | ( | void *SHZ_RESTRICT | dst, |
| const void *SHZ_RESTRICT | src ) |
Copies src to dst in a single 32-byte transaction using the Store Queues and XMTRX.
Equivalent to shz_sq_memcpy32_1(), except copying is done through XMTRX.