SH4ZAM! 0.1.0
Fast math library for the Sega Dreamcast's SH4 CPU
Loading...
Searching...
No Matches
shz_scalar.h
Go to the documentation of this file.
1/*! \file
2 * \brief General-purpose scalar math routines.
3 * \ingroup scalar
4 *
5 * This file provides a collection of general-purpose math routines for
6 * operating on floating-point scalar values.
7 *
8 * \todo
9 * - Use FP rounding modes for rounding functionality.
10 *
11 * \author 2025, 2026 Falco Girgis
12 * \author 2025, 2026 Paul Cercueil
13 * \author 2026 Aleios
14 * \author 2026 jnmartin64
15 *
16 * \copyright MIT License
17 */
18
19#ifndef SHZ_SCALAR_H
20#define SHZ_SCALAR_H
21
22#include <math.h>
23#include <stdbool.h>
24
25#include "shz_cdefs.h"
26
27/*! \defgroup scalar Scalar
28 \brief Scalar functions and utilities.
29
30 This API is designed around performing various scalar operations, providing
31 alternatives for routines typically found within `<math.h>`.
32
33 \warning
34 Unlike the standard C floating-point routines, these routines are often saving
35 cycles by not handling NaN and INF values as well as by not reporting rounding
36 and domain errors back to the user.
37*/
38
39//! Floating-point epsilon used with inexact FP-based comparisons.
40#define SHZ_FLT_EPSILON 0.000001f
41
42SHZ_DECLS_BEGIN
43
44/*! \name Comparisons
45 \brief Routines for comparing and classifying floating-point values.
46 @{
47*/
48
49//! Returns the minimum value of two given floats.
50SHZ_FORCE_INLINE float shz_fminf(float a, float b) SHZ_NOEXCEPT;
51
52//! Returns the maximum value of two given floats.
53SHZ_FORCE_INLINE float shz_fmaxf(float a, float b) SHZ_NOEXCEPT;
54
55//! Checks for equality based on EITHER the absolute tolerance or relative tolerance, using SHZ_FLT_EPSILON.
56SHZ_FORCE_INLINE bool shz_equalf(float a, float b) SHZ_NOEXCEPT;
57
58//! Checks for equality based on the absolute tolerance using SHZ_FLT_EPSILON.
59SHZ_FORCE_INLINE bool shz_equalf_abs(float a, float b) SHZ_NOEXCEPT;
60
61//! Checks for equality based on the relative tolerance using SHZ_FLT_EPSILON.
62SHZ_FORCE_INLINE bool shz_equalf_rel(float a, float b) SHZ_NOEXCEPT;
63
64//! @}
65
66/*! \name Rounding
67 \brief Routines for rounding and manipulating floats.
68 @{
69 */
70
71/*! Replacement for the <math.h> routine, floorf().
72
73 Returns the closest integral value to \p x, rounded down, as a float.
74
75 \warning
76 This routine is only valid for for the range INT32_MIN <= \p x <= INT32_MAX.
77
78 \sa ceilf()
79*/
80SHZ_FORCE_INLINE float shz_floorf(float x) SHZ_NOEXCEPT;
81
82/*! Replacement for the <math.h> routine, ceilf().
83
84 Returns the closest integral value to \p x, rounded up, as a float.
85
86 \warning
87 This routine only returns valid values for the input range
88 INT32_MIN <= \p x <= INT32_MAX.
89
90 \sa floorf()
91*/
92SHZ_FORCE_INLINE float shz_ceilf(float x) SHZ_NOEXCEPT;
93
94/*! Replacement for the <math.h> routine, roundf().
95
96 Returns the value of \p x rounded to the nearest integer, as a float.
97
98 \warning
99 This routine only returns valid values for the input range
100 -UINT32_MAX <= \p x <= UINT32_MAX.
101*/
102SHZ_INLINE float shz_roundf(float x) SHZ_NOEXCEPT;
103
104/*! Replacement for the <math.h> routine, truncf().
105
106 Returns the value of \p with its fractional component discarded.
107
108 \warning
109 This routine only returns valid values for the input range
110 INT32_MIN <= \p x <= INT32_MAX.
111*/
112SHZ_FORCE_INLINE float shz_truncf(float x) SHZ_NOEXCEPT;
113
114/*! Replacement for the <math.h> routine, remainderf().
115
116 Returns the floating-point remainder of \p num divided by \p denom,
117 rounded to the nearest integer (as a float).
118
119 \warning
120 This routine does not gracefully handle dividing by zero, and it is only
121 valid for the input range INT32_MIN <= \p num / \p denom <= INT32_MAX.
122
123 \sa shz_fmodf(), shz_remquof(), shz_truncf().
124*/
125SHZ_FORCE_INLINE float shz_remainderf(float num, float denom) SHZ_NOEXCEPT;
126
127/*! Replacement for the <math.h> routine, fmodf().
128
129 Returns the floating-point remainder of \p num divided by \p denom,
130 rounded towards zero.
131
132 \warning
133 This routine does not gracefully handle dividing by zero.
134
135 \sa shz_remainderf()
136*/
137SHZ_FORCE_INLINE float shz_fmodf(float num, float denom) SHZ_NOEXCEPT;
138
139/*! (Sorta) Replacement for the <math.h> routine, remquof().
140
141 Returns the floating-point remainder of \p num divided by \p denom,
142 rounded to the nearest integer (as a float). \p quot is set equal to
143 the quotient which is used as part of the calculation.
144
145 \note
146 \p quot is returning as a `float` rather than an `int` as with standard C.
147 Simply cast to an `int` manually afterwards if that is the desired behavior.
148
149 \warning
150 This routine does not gracefully handle dividing by zero.
151
152 \sa shz_remainderf()
153*/
154SHZ_FORCE_INLINE float shz_remquof(float num, float denom, float* quot) SHZ_NOEXCEPT;
155
156//! @}
157
158/*! \name Mapping
159 \brief Routines for mapping a number to another range.
160 @{
161*/
162
163//! Clamps a floating-point value by the given \p min and \p max values.
164SHZ_FORCE_INLINE float shz_clampf(float value, float min, float max) SHZ_NOEXCEPT;
165
166//! Maps a value within the given range \p from to \p to, to be within the range of `0.0f += 1.0f`.
167SHZ_FORCE_INLINE float shz_normalizef(float current, float from, float to) SHZ_NOEXCEPT;
168
169//! Maps a value within the given range \p from to \p to, to be within the range of `0.0f + 1.0f` more quickly, provided \p to - \p from is a positive difference.
170SHZ_FORCE_INLINE float shz_normalizef_fsrra(float current, float from, float to) SHZ_NOEXCEPT;
171
172//! Maps a value within the given range \p inputStart to \p inputEnd, to be within the range of \p outputStart to \p outputEnd.
173SHZ_FORCE_INLINE float shz_remapf(float value, float inputStart, float inputEnd, float outputStart, float outputEnd) SHZ_NOEXCEPT;
174
175//! Maps a value within the given range \p inputStart to \p inputEnd, to be within the range of \p outputStart to \p outputEnd more quickly, provided the \p outputEnd - \p outputStart is a positive difference.
176SHZ_FORCE_INLINE float shz_remapf_fsrra(float value, float inputStart, float inputEnd, float outputStart, float outputEnd) SHZ_NOEXCEPT;
177
178//! Wraps the given \p value back to be within the range of \p min to \p max.
179SHZ_FORCE_INLINE float shz_wrapf(float value, float min, float max) SHZ_NOEXCEPT;
180
181//! Wraps the given \p value back to be within the range of \p min to \p max more quickly, provided \p max - \p min is a positive difference.
182SHZ_FORCE_INLINE float shz_wrapf_fsrra(float value, float min, float max) SHZ_NOEXCEPT;
183
184
185//! Returns the fractional part of \p x, equivalent to GLSL `fract()`.
186SHZ_FORCE_INLINE float shz_fractf(float x) SHZ_NOEXCEPT;
187
188//! Returns -1.0f if \p x < 0, 0.0f if \p x == 0, or 1.0f if \p x > 0.
189SHZ_FORCE_INLINE float shz_signf(float x) SHZ_NOEXCEPT;
190
191//! Clamps \p x to the range [0.0f, 1.0f].
192SHZ_FORCE_INLINE float shz_saturatef(float x) SHZ_NOEXCEPT;
193
194//! @}
195
196/*! \name Miscellaneous
197 * \brief Assorted routines implementing other fp operations.
198 * @{
199 */
200
201/*! Replacement for the <math.h> routine, fabsf().
202
203 Returns the absolute value of \p x.
204*/
205SHZ_FORCE_INLINE float shz_fabsf(float x) SHZ_NOEXCEPT;
206
207/*! Replacement for the <math.h> routine, copysignf().
208
209 Returns the value of \p x with the sign of \p y.
210*/
211SHZ_FORCE_INLINE float shz_copysignf(float x, float y) SHZ_NOEXCEPT;
212
213/*! Replacement for the <math.h> routine, fmaf().
214
215 Returns \p a * \p b + \p c, performing an FP multiply + accumulate operation.
216*/
217SHZ_FORCE_INLINE float shz_fmaf(float a, float b, float c) SHZ_NOEXCEPT;
218
219/*! Replacement for the <math.h> routine, fdimf(),
220
221 Returns the positive difference between \p x and \p y or zero if
222 y >= x.
223
224 \warning
225 Unlike fdimf(), this routine does not handle INF and NAN values.
226*/
227SHZ_FORCE_INLINE float shz_fdimf(float x, float y) SHZ_NOEXCEPT;
228
229/*! Replacement for the <math.h> routine, hypotf().
230
231 Returns the hypoteneuse of the right triangle with the given legs.
232
233 \warning
234 Unlike hypotf(), this routine has no error or overflow handling.
235*/
236SHZ_FORCE_INLINE float shz_hypotf(float x, float y) SHZ_NOEXCEPT;
237
238/*! Replacement for the <math.h> routine, cbrtf().
239
240 Returns the cubed root of \p x, using a fast approximation.
241*/
242SHZ_FORCE_INLINE float shz_cbrtf(float x) SHZ_NOEXCEPT;
243
244//! Returns a value that is linearly interpolated between \p a and \p b by the given ratio, \p t.
245SHZ_FORCE_INLINE float shz_lerpf(float a, float b, float t) SHZ_NOEXCEPT;
246
247//! Returns a value that is barycentrically interpolated between \p a, \p b, and \p c using the given barycentric coordinates, \p u and \p v.
248SHZ_FORCE_INLINE float shz_barycentric_lerpf(float a, float b, float c, float u, float v) SHZ_NOEXCEPT;
249
250//! Uses the quadratic formula with the given coefficients to solve for the two roots, returning `true` if any real roots exist, and `false` if the roots are only imaginary.
251SHZ_FORCE_INLINE bool shz_quadratic_roots(float a, float b, float c, float* root1, float* root2) SHZ_NOEXCEPT;
252
253//! Returns a random floating-point number between `0.0f` and `1.0f`, using and updating the given seed.
254SHZ_FORCE_INLINE float shz_randf(int* seed) SHZ_NOEXCEPT;
255
256//! Returns a random floating-point number between \p min and \p max, using and updating the given seed.
257SHZ_FORCE_INLINE float shz_randf_range(int* seed, float min, float max) SHZ_NOEXCEPT;
258
259//! returns 0.0f if x < edge, otherwise 1.0f
260SHZ_FORCE_INLINE float shz_stepf(float x, float edge) SHZ_NOEXCEPT;
261
262//! Returns 0.0f at/below edge0, 1.0f at/above edge1, smoothly varying in-between. edge0 must be less than edge1 or result is undefined.
263SHZ_FORCE_INLINE float shz_smoothstepf(float x, float edge0, float edge1) SHZ_NOEXCEPT;
264
265//! Returns 0.0f at/below edge0, 1.0f at/above edge1, smoothly varying in-between. Accepts inverse edges.
266SHZ_FORCE_INLINE float shz_smoothstepf_safe(float x, float edge0, float edge1) SHZ_NOEXCEPT;
267
268//! @}
269
270/*! \name FSRRA
271 * \brief Routines built around fast reciprocal square root instruction.
272 * @{
273 */
274
275//! Calculates 1.0f/sqrtf( \p x), using a fast approximation.
276SHZ_FORCE_INLINE float shz_inv_sqrtf_fsrra(float x) SHZ_NOEXCEPT;
277
278//! Calculates 1.0f/sqrtf( \p x ), using a fast approximation, while safely protecting against division-by-zero.
279SHZ_FORCE_INLINE float shz_inv_sqrtf(float x) SHZ_NOEXCEPT;
280
281//! Returns the fast approximate square root of the given value, \p x.
282SHZ_FORCE_INLINE float shz_sqrtf_fsrra(float x) SHZ_NOEXCEPT;
283
284//! Returns the fast approximate square root of the given value, \p x, safely returning `0.0f` is \p x == `0.0f`.
285SHZ_FORCE_INLINE float shz_sqrtf(float x) SHZ_NOEXCEPT;
286
287//! Takes the inverse of \p x using a very fast approximation, returning a positive result.
288SHZ_FORCE_INLINE float shz_invf_fsrra(float x) SHZ_NOEXCEPT;
289
290//! Takes the inverse of \p x using a slighty faster approximation than doing a full division, safely handling negative values.
291SHZ_FORCE_INLINE float shz_invf(float x) SHZ_NOEXCEPT;
292
293//! Divides \p num by \p denom using a very fast approximation, which requires \p denom be a positive value.
294SHZ_FORCE_INLINE float shz_divf_fsrra(float num, float denom) SHZ_NOEXCEPT;
295
296//! Divides \p num by \p denom using a slightly faster approximation, allowing \p denom to be negative.
297SHZ_FORCE_INLINE float shz_divf(float num, float denom) SHZ_NOEXCEPT;
298
299//! @}
300
301/*! \name FIPR
302 * \brief Routines built around fast 4D dot product.
303 * \todo Generalize these with a macro that lets you choose vector FP regs.
304 * @{
305 */
306
307//! Takes two sets of 3D vectors as 3 floats and calculates their dot product using an approximation.
308SHZ_FORCE_INLINE float shz_dot6f(float x1, float y1, float z1,
309 float x2, float y2, float z2) SHZ_NOEXCEPT;
310
311//! Takes two sets of 4D vectors as 4 floats and calculates their dot product using an approximation.
312SHZ_FORCE_INLINE float shz_dot8f(float x1, float y1, float z1, float w1,
313 float x2, float y2, float z2, float w2) SHZ_NOEXCEPT;
314
315//! Takes a 3D vector as 3 floats and calculates its squared magnitude using a fast approximation.
316SHZ_FORCE_INLINE float shz_mag_sqr3f(float x, float y, float z) SHZ_NOEXCEPT;
317
318//! Takes a 4D vector as 4 floats and calculates its squared magnitude using a fast approximation.
319SHZ_FORCE_INLINE float shz_mag_sqr4f(float x, float y, float z, float w) SHZ_NOEXCEPT;
320
321//! @}
322
323/*! \name Transcendental
324 \brief Fast approximations for non-trig transcendental functions.
325 @{
326*/
327
328//! Fast approximation for raising 2 to a floating-point power.
329SHZ_FORCE_INLINE float shz_pow2f(float p) SHZ_NOEXCEPT;
330
331//! Fast approximation for C's powf().
332SHZ_FORCE_INLINE float shz_powf(float x, float p) SHZ_NOEXCEPT;
333
334//! Fast approximation of POSIX's pow10f().
335SHZ_FORCE_INLINE float shz_pow10f(float x) SHZ_NOEXCEPT;
336
337//! Fast approximation for C's log2f().
338SHZ_FORCE_INLINE float shz_log2f(float x) SHZ_NOEXCEPT;
339
340//! Fast approximation for C's logf().
341SHZ_FORCE_INLINE float shz_logf(float x) SHZ_NOEXCEPT;
342
343//! Fast approximation for C's log10f().
344SHZ_FORCE_INLINE float shz_log10f(float x) SHZ_NOEXCEPT;
345
346//! Fast approximation for C's expf().
347SHZ_FORCE_INLINE float shz_expf(float p) SHZ_NOEXCEPT;
348
349//! @}
350
351#include "inline/shz_scalar.inl.h"
352
353SHZ_DECLS_END
354
355#endif // SHZ_INTRIN_H
float shz_mag_sqr3f(float x, float y, float z) SHZ_NOEXCEPT
Takes a 3D vector as 3 floats and calculates its squared magnitude using a fast approximation.
float shz_powf(float x, float p) SHZ_NOEXCEPT
Fast approximation for C's powf().
bool shz_equalf_abs(float a, float b) SHZ_NOEXCEPT
Checks for equality based on the absolute tolerance using SHZ_FLT_EPSILON.
float shz_remquof(float num, float denom, float *quot) SHZ_NOEXCEPT
(Sorta) Replacement for the <math.h> routine, remquof().
float shz_expf(float p) SHZ_NOEXCEPT
Fast approximation for C's expf().
float shz_normalizef_fsrra(float current, float from, float to) SHZ_NOEXCEPT
Maps a value within the given range from to to, to be within the range of 0.0f + 1....
float shz_randf(int *seed) SHZ_NOEXCEPT
Returns a random floating-point number between 0.0f and 1.0f, using and updating the given seed.
float shz_log10f(float x) SHZ_NOEXCEPT
Fast approximation for C's log10f().
float shz_floorf(float x) SHZ_NOEXCEPT
Replacement for the <math.h> routine, floorf().
float shz_wrapf_fsrra(float value, float min, float max) SHZ_NOEXCEPT
Wraps the given value back to be within the range of min to max more quickly, provided max - min is a...
float shz_normalizef(float current, float from, float to) SHZ_NOEXCEPT
Maps a value within the given range from to to, to be within the range of 0.0f += 1....
float shz_log2f(float x) SHZ_NOEXCEPT
Fast approximation for C's log2f().
float shz_mag_sqr4f(float x, float y, float z, float w) SHZ_NOEXCEPT
Takes a 4D vector as 4 floats and calculates its squared magnitude using a fast approximation.
float shz_smoothstepf(float x, float edge0, float edge1) SHZ_NOEXCEPT
Returns 0.0f at/below edge0, 1.0f at/above edge1, smoothly varying in-between. edge0 must be less tha...
float shz_dot8f(float x1, float y1, float z1, float w1, float x2, float y2, float z2, float w2) SHZ_NOEXCEPT
Takes two sets of 4D vectors as 4 floats and calculates their dot product using an approximation.
float shz_fmaxf(float a, float b) SHZ_NOEXCEPT
Returns the maximum value of two given floats.
float shz_signf(float x) SHZ_NOEXCEPT
Returns -1.0f if x < 0, 0.0f if x == 0, or 1.0f if x > 0.
float shz_fdimf(float x, float y) SHZ_NOEXCEPT
Replacement for the <math.h> routine, fdimf(),.
float shz_sqrtf_fsrra(float x) SHZ_NOEXCEPT
Returns the fast approximate square root of the given value, x.
float shz_pow10f(float x) SHZ_NOEXCEPT
Fast approximation of POSIX's pow10f().
bool shz_quadratic_roots(float a, float b, float c, float *root1, float *root2) SHZ_NOEXCEPT
Uses the quadratic formula with the given coefficients to solve for the two roots,...
float shz_dot6f(float x1, float y1, float z1, float x2, float y2, float z2) SHZ_NOEXCEPT
Takes two sets of 3D vectors as 3 floats and calculates their dot product using an approximation.
float shz_ceilf(float x) SHZ_NOEXCEPT
Replacement for the <math.h> routine, ceilf().
float shz_remapf_fsrra(float value, float inputStart, float inputEnd, float outputStart, float outputEnd) SHZ_NOEXCEPT
Maps a value within the given range inputStart to inputEnd, to be within the range of outputStart to ...
float shz_hypotf(float x, float y) SHZ_NOEXCEPT
Replacement for the <math.h> routine, hypotf().
float shz_copysignf(float x, float y) SHZ_NOEXCEPT
Replacement for the <math.h> routine, copysignf().
float shz_invf(float x) SHZ_NOEXCEPT
Takes the inverse of x using a slighty faster approximation than doing a full division,...
float shz_wrapf(float value, float min, float max) SHZ_NOEXCEPT
Wraps the given value back to be within the range of min to max.
float shz_saturatef(float x) SHZ_NOEXCEPT
Clamps x to the range [0.0f, 1.0f].
float shz_truncf(float x) SHZ_NOEXCEPT
Replacement for the <math.h> routine, truncf().
float shz_invf_fsrra(float x) SHZ_NOEXCEPT
Takes the inverse of x using a very fast approximation, returning a positive result.
float shz_sqrtf(float x) SHZ_NOEXCEPT
Returns the fast approximate square root of the given value, x, safely returning 0....
float shz_fmaf(float a, float b, float c) SHZ_NOEXCEPT
Replacement for the <math.h> routine, fmaf().
float shz_barycentric_lerpf(float a, float b, float c, float u, float v) SHZ_NOEXCEPT
Returns a value that is barycentrically interpolated between a, b, and c using the given barycentric ...
bool shz_equalf(float a, float b) SHZ_NOEXCEPT
Checks for equality based on EITHER the absolute tolerance or relative tolerance, using SHZ_FLT_EPSIL...
float shz_smoothstepf_safe(float x, float edge0, float edge1) SHZ_NOEXCEPT
Returns 0.0f at/below edge0, 1.0f at/above edge1, smoothly varying in-between. Accepts inverse edges.
float shz_inv_sqrtf_fsrra(float x) SHZ_NOEXCEPT
Calculates 1.0f/sqrtf( x), using a fast approximation.
float shz_cbrtf(float x) SHZ_NOEXCEPT
Replacement for the <math.h> routine, cbrtf().
bool shz_equalf_rel(float a, float b) SHZ_NOEXCEPT
Checks for equality based on the relative tolerance using SHZ_FLT_EPSILON.
float shz_fractf(float x) SHZ_NOEXCEPT
Returns the fractional part of x, equivalent to GLSL fract().
float shz_fmodf(float num, float denom) SHZ_NOEXCEPT
Replacement for the <math.h> routine, fmodf().
float shz_lerpf(float a, float b, float t) SHZ_NOEXCEPT
Returns a value that is linearly interpolated between a and b by the given ratio, t.
float shz_remainderf(float num, float denom) SHZ_NOEXCEPT
Replacement for the <math.h> routine, remainderf().
float shz_divf_fsrra(float num, float denom) SHZ_NOEXCEPT
Divides num by denom using a very fast approximation, which requires denom be a positive value.
float shz_divf(float num, float denom) SHZ_NOEXCEPT
Divides num by denom using a slightly faster approximation, allowing denom to be negative.
float shz_fminf(float a, float b) SHZ_NOEXCEPT
Returns the minimum value of two given floats.
float shz_inv_sqrtf(float x) SHZ_NOEXCEPT
Calculates 1.0f/sqrtf( x ), using a fast approximation, while safely protecting against division-by-z...
float shz_randf_range(int *seed, float min, float max) SHZ_NOEXCEPT
Returns a random floating-point number between min and max, using and updating the given seed.
float shz_clampf(float value, float min, float max) SHZ_NOEXCEPT
Clamps a floating-point value by the given min and max values.
float shz_roundf(float x) SHZ_NOEXCEPT
Replacement for the <math.h> routine, roundf().
float shz_fabsf(float x) SHZ_NOEXCEPT
Replacement for the <math.h> routine, fabsf().
float shz_logf(float x) SHZ_NOEXCEPT
Fast approximation for C's logf().
float shz_remapf(float value, float inputStart, float inputEnd, float outputStart, float outputEnd) SHZ_NOEXCEPT
Maps a value within the given range inputStart to inputEnd, to be within the range of outputStart to ...
float shz_stepf(float x, float edge) SHZ_NOEXCEPT
returns 0.0f if x < edge, otherwise 1.0f
float shz_pow2f(float p) SHZ_NOEXCEPT
Fast approximation for raising 2 to a floating-point power.