Schnelle Sinus/Cosinus-Berechnung für Embedded-Anwendungen in C
Fast sine/cosine calculation for embedded application in C
In some embedded applications you need to calculate trigonometric functions,
especially if "something mechanical has to turn". A well known problem of the
floating point sin()
function is, that it takes - compared to other tasks -
a while to compute, moreover the calculation time varies a lot. This variation
is caused by the altorighm used, which iteratively evaluates a (Taylor) series
until the error of the output value reaches a break-threshold.
Engineers / Embedded programmers, especially in the signal/control field,
dislike long and varying calculation times, simply because they have to
ensure that the output for the next cycle can be provided in time. As the
Accuracy of hardware (ADC,DAC/PWM, connected electronics) often has the
highest impact on the accuracy, it is often not sensible to calculate trigonometric
functions "to the last bit of a double
".
"Not excact. Accurate enough to fit the purpose - but quickly, please."
This Example demonstrates the implementation of sine function as occasionally
used in the embedded field. The difference to the standard (math.h
) sin
,
is the processing time and the accuracy. It calculates the ouput in a few
clock cycles using interpolated table lookup. 66 bytes of contant memory are
required. The error (compared to the floating point sine) is less than 0.05%.
In der Embeddedprogrammierung werden hin und wieder trigonometrische Funktionen benötigt, vor allem "wenn sich dabei etwas Mechanisches drehen soll". Dabei stößt man des Öfteren auf das Problem, dass die Berechnung des Sinus/Cosinus im Vergleich zu anderen Aufgaben merklich Zeit beansprucht und des Weiteren abhängig von den Werten unterschiedlich lange dauern kann. Dies rührt von der Tatsache her, dass die Funktion einen Algorithmus durchrechnet, bei dem die Größe des verbleibenden Restfehlers ermittelt werden kann (Taylorreihe). Dies wird so oft wiederholt bis die Genauigkeit ausreichend ist.
Bei Ingenieuren, vor allem in der Regelungstechnik, sind länger dauernde Rechnungen und variable Zeiten unbeliebt, denn dies erhöht das Risiko bis zum nächsten Takt das benötigte Ergebnis nicht bereitstellen zu können. Da die Genauigkeit der Werte oftmals in erster Linie durch die Hardware (DAC/PWM-Kanäle und die Elekronik dahinter) begrenzt, so dass eine Berechnung von trigonometrischen Funktionen "auf's letzte double-Bit" keinen Sinn ergibt.
"Nicht genau. Genau genug, dass ich keine Probleme bekomme. Aber bitte schnell."
Dieses Beipiel zeigt die Implementation der Sinus-Funktion wie sie in Embedded
Anwendungen benötigt wird. Der Untschied zum Standard-Fließkomma-sin
der
Math-Library besteht in der Genauigkeit und der Rechendauer. Sie berechnet
die Ausgabe in kurzer Zeit durch Tabellen-Lookup und Interpolation in
Integerarithmetik. Dabei benötigt sie 66 Bytes für die Tabellen. Der Fehler
bei der verwendeten Tabellengröße liegt dabei unterhalb einem halben Promill.
Die Erkäuterungen wie die Funktion arbeitet habe ich hier hin ausgelagert
Header
/**
* Example for a interpolated sine/cosine table lookup
*
* @file sin1.h
* @author stfwi
*
*/
#ifndef __SW__SIN1_H__
#define __SW__SIN1_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/types.h>
/**
* Sine calculation using interpolated table lookup.
* Instead of radiants or degrees we use "turns" here. Means this
* sine does NOT return one phase for 0 to 2*PI, but for 0 to 1.
* Input: -1 to 1 as int16 Q15 == -32768 to 32767.
* Output: -1 to 1 as int16 Q15 == -32768 to 32767.
*
* @param int16_t angle Q15
* @return int16_t Q15
*/
int16_t sin1(int16_t angle);
/**
* Cosine calculation using interpolated table lookup.
* Instead of radiants or degrees we use "turns" here. Means this
* cosine does NOT return one phase for 0 to 2*PI, but for 0 to 1.
* Input: -1 to 1 as int16 Q15 == -32768 to 32767.
* Output: -1 to 1 as int16 Q15 == -32768 to 32767.
*
* @param int16_t angle Q15
* @return int16_t Q15
*/
int16_t cos1(int16_t angle);
#ifdef __cplusplus
}
#endif
#endif
Implemation
/**
* Example for a sine/cosine table lookup
* Implementation of sin1() / cos1().
* We "outsource" this implementation so that the precompiler constants/macros
* are only defined here.
*
* @file sin1.c
* @author stfwi
**/
#include "sin1.h"
/*
* The number of bits of our data type: here 16 (sizeof operator returns bytes).
*/
#define INT16_BITS (8 * sizeof(int16_t))
#ifndef INT16_MAX
#define INT16_MAX ((1<<(INT16_BITS-1))-1)
#endif
/*
* "5 bit" large table = 32 values. The mask: all bit belonging to the table
* are 1, the all above 0.
*/
#define TABLE_BITS (5)
#define TABLE_SIZE (1<<TABLE_BITS)
#define TABLE_MASK (TABLE_SIZE-1)
/*
* The lookup table is to 90DEG, the input can be -360 to 360 DEG, where negative
* values are transformed to positive before further processing. We need two
* additional bits (*4) to represent 360 DEG:
*/
#define LOOKUP_BITS (TABLE_BITS+2)
#define LOOKUP_MASK ((1<<LOOKUP_BITS)-1)
#define FLIP_BIT (1<<TABLE_BITS)
#define NEGATE_BIT (1<<(TABLE_BITS+1))
#define INTERP_BITS (INT16_BITS-1-LOOKUP_BITS)
#define INTERP_MASK ((1<<INTERP_BITS)-1)
/**
* "5 bit" lookup table for the offsets. These are the sines for exactly
* at 0deg, 11.25deg, 22.5deg etc. The values are from -1 to 1 in Q15.
*/
static int16_t sin90[TABLE_SIZE+1] = {
0x0000,0x0647,0x0c8b,0x12c7,0x18f8,0x1f19,0x2527,0x2b1e,
0x30fb,0x36b9,0x3c56,0x41cd,0x471c,0x4c3f,0x5133,0x55f4,
0x5a81,0x5ed6,0x62f1,0x66ce,0x6a6c,0x6dc9,0x70e1,0x73b5,
0x7640,0x7883,0x7a7c,0x7c29,0x7d89,0x7e9c,0x7f61,0x7fd7,
0x7fff
};
/**
* Sine calculation using interpolated table lookup.
* Instead of radiants or degrees we use "turns" here. Means this
* sine does NOT return one phase for 0 to 2*PI, but for 0 to 1.
* Input: -1 to 1 as int16 Q15 == -32768 to 32767.
* Output: -1 to 1 as int16 Q15 == -32768 to 32767.
*
* See the full description at www.AtWillys.de for the detailed
* explanation.
*
* @param int16_t angle Q15
* @return int16_t Q15
*/
int16_t sin1(int16_t angle)
{
int16_t v0, v1;
if(angle < 0) { angle += INT16_MAX; angle += 1; }
v0 = (angle >> INTERP_BITS);
if(v0 & FLIP_BIT) { v0 = ~v0; v1 = ~angle; } else { v1 = angle; }
v0 &= TABLE_MASK;
v1 = sin90[v0] + (int16_t) (((int32_t) (sin90[v0+1]-sin90[v0]) * (v1 & INTERP_MASK)) >> INTERP_BITS);
if((angle >> INTERP_BITS) & NEGATE_BIT) v1 = -v1;
return v1;
}
/**
* Cosine calculation using interpolated table lookup.
* Instead of radiants or degrees we use "turns" here. Means this
* cosine does NOT return one phase for 0 to 2*PI, but for 0 to 1.
* Input: -1 to 1 as int16 Q15 == -32768 to 32767.
* Output: -1 to 1 as int16 Q15 == -32768 to 32767.
*
* @param int16_t angle Q15
* @return int16_t Q15
*/
int16_t cos1(int16_t angle)
{
if(angle < 0) { angle += INT16_MAX; angle += 1; }
return sin1(angle - (int16_t)(((int32_t)INT16_MAX * 270) / 360));
}
Beispielprogramm
Example program
/**
* Example for a sine/cosine table lookup
*
* (main file that uses sin1.h/sin1.c)
*
* @file sinlookup.c
* @author stfwi
*
*/
#include "sin1.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/types.h>
/**
* Generate the lookup tables for sin1() in a way that the text can be pasted
* into a C source code.
*
* @return void
*/
void generate_tables()
{
#define Q15 (1.0/(double)((1<<15)-1))
#define TABLE_SIZE (1<<5)
#define SCALER ((M_PI/2.0) / TABLE_SIZE)
int i;
printf("static int16_t sin90_offset[TABLE_SIZE+1] = {\n ");
for(i=0; i < TABLE_SIZE; i++) {
printf("0x%04x%s", (int16_t) (sin(SCALER * i) / Q15), (i%8!=7) ? "," : ",\n ");
}
printf("0x7fff\n};\n");
}
/**
* Run a test comparing the real floating point sine with the sin1 function.
* Output CSV: "ANGLE in DEG", "REAL SINE", "SIN1", "ERROR".
*
* @return void
*/
void run()
{
double angle;
printf("%6s, %8s, %8s, %6s\n", "angle", "sin", "sin1", "error");
for(angle=0; angle<360; angle+=0.1) {
double lookup_sine = sin1(angle * 32768.0 / 360.0) * Q15;
double real_sine = sin(angle * 2*M_PI / 360.0);
double sine_error = real_sine - lookup_sine;
printf("%6.1f, %+8.5f, %+8.5f, %+8.6f\n", angle, real_sine, lookup_sine, sine_error);
}
}
/**
* Test main function
*
* @param int argc
* @param char **argv
* @return int
*/
int main(int argc, char** argv)
{
if(argc < 2) {
fprintf(stderr, "Usage %s -g : Create lookup tables\n", argv[0]);
fprintf(stderr, " %s <int angle> : Test a number (0 to 32767)\n", argv[0]);
fprintf(stderr, " %s -r : Run iterated numbers test\n", argv[0]);
return 1;
} else if(!strcmp(argv[1], "-g")) {
generate_tables();
} else if(!strcmp(argv[1], "-r")) {
run();
} else if(!isnan(atof(argv[1]))) {
long l = (long) atof(argv[1]);
while(l < 0x0000) l += 0x8000;
while(l >= 0x7fff) l -= 0x8000;
printf("%d\n", (int) sin1(l));
}
return 0;
}
Makefile
all: sinlookup generate run
clean:
@rm -f *.o
@rm -f *.s
@rm -f *.i
sinlookup: sin1.c sinlookup.c
gcc -g -Wall -O3 -save-temps sin1.c sinlookup.c -o sinlookup -lm
@rm -f *.o
@rm -f *.i
run: sinlookup
@echo
./sinlookup -r
generate: sinlookup
@echo
./sinlookup -g
Beispiel-Ausgabe
Example program output
./sinlookup -g
int16_t sin90_offset[TABLE_SIZE] = {
0x0000,0x0647,0x0c8b,0x12c7,0x18f8,0x1f19,0x2527,0x2b1e,
0x30fb,0x36b9,0x3c56,0x41cd,0x471c,0x4c3f,0x5133,0x55f4,
0x5a81,0x5ed6,0x62f1,0x66ce,0x6a6c,0x6dc9,0x70e1,0x73b5,
0x7640,0x7883,0x7a7c,0x7c29,0x7d89,0x7e9c,0x7f61,0x7fd7
};
int16_t sin90_1_diff[TABLE_SIZE] = {
0x0647,0x0643,0x063c,0x0630,0x0621,0x060e,0x05f7,0x05dc,
0x05be,0x059c,0x0577,0x054e,0x0522,0x04f3,0x04c1,0x048c,
0x0454,0x041a,0x03dd,0x039e,0x035c,0x0318,0x02d3,0x028b,
0x0242,0x01f8,0x01ac,0x0160,0x0112,0x00c4,0x0076,0x0027
};
./sinlookup -r
angle, sin, sin1, error
0.0, +0.00000, +0.00000, +0.000000
0.1, +0.00175, +0.00171, +0.000036
0.2, +0.00349, +0.00342, +0.000073
[...]
359.6, -0.00698, -0.00687, -0.000115
359.7, -0.00524, -0.00516, -0.000078
359.8, -0.00349, -0.00342, -0.000073
359.9, -0.00175, -0.00171, -0.000036
Dissasembly
Just to show how small the optimised code is, here the dissassembly for 64 bit intel and ARM:
Dissasembly amd64
Dissasembly amd64
Nur um kurz darzustellen, dass die optimierte Funktion recht klein ist, hier die Dissasemblierung für Intel/AMD 64 und ARM:
Dissasembly amd64
sin1:
.LFB3:
leal -32768(%rdi), %eax
testw %di, %di
cmovs %eax, %edi
movl %edi, %eax
sarw $8, %ax
movswl %ax, %edx
testb $32, %dl
je .L3
notl %eax
notl %edi
.L3:
andl $31, %eax
movzbl %dil, %edi
movslq %eax, %rcx
addl $1, %eax
movzwl sin90(%rcx,%rcx), %ecx
cltq
movswl sin90(%rax,%rax), %eax
movswl %cx, %esi
subl %esi, %eax
imull %edi, %eax
sarl $8, %eax
addl %ecx, %eax
movl %eax, %ecx
negl %ecx
andl $64, %edx
cmovne %ecx, %eax
ret
[...]
sin90:
.value 0
.value 1607
[...]
.value 32727
.value 32767
Dissasembly ARM (Cortex-M3)
[...]
sin1:
cmp r0, #0
itt lt
sublt r0, r0, #32768
sxthlt r0, r0
ubfx r3, r0, #8, #16
uxth r2, r3
lsls r1, r2, #26
push {r4}
bpl .L3
mvns r3, r3
mvns r0, r0
uxth r3, r3
uxth r0, r0
.L3:
ldr r4, .L9
and r3, r3, #31
ldrh r1, [r4, r3, lsl #1]
adds r3, r3, #1
ldrsh r4, [r4, r3, lsl #1]
sxth r3, r1
subs r3, r4, r3
uxtb r0, r0
mul r0, r0, r3
add r0, r1, r0, asr #8
uxth r0, r0
lsls r3, r2, #25
it mi
negmi r0, r0
uxth r0, r0
sxth r0, r0
pop {r4}
bx lr
.L10:
.align 2
.L9:
.word .LANCHOR0
[...]
.LANCHOR0 = . + 0
.type sin90, %object
.size sin90, 66
sin90:
.short 0
.short 1607
[...]
.short 32767