ngscopeclient 0.1-dev+51fbda87c
avx_mathfun.h
1/*
2 AVX implementation of sin, cos, sincos, exp and log
3
4 Based on "sse_mathfun.h", by Julien Pommier
5 http://gruntthepeon.free.fr/ssemath/
6
7 Copyright (C) 2012 Giovanni Garberoglio
8 Interdisciplinary Laboratory for Computational Science (LISC)
9 Fondazione Bruno Kessler and University of Trento
10 via Sommarive, 18
11 I-38123 Trento (Italy)
12
13 This software is provided 'as-is', without any express or implied
14 warranty. In no event will the authors be held liable for any damages
15 arising from the use of this software.
16
17 Permission is granted to anyone to use this software for any purpose,
18 including commercial applications, and to alter it and redistribute it
19 freely, subject to the following restrictions:
20
21 1. The origin of this software must not be misrepresented; you must not
22 claim that you wrote the original software. If you use this software
23 in a product, an acknowledgment in the product documentation would be
24 appreciated but is not required.
25 2. Altered source versions must be plainly marked as such, and must not be
26 misrepresented as being the original software.
27 3. This notice may not be removed or altered from any source distribution.
28
29 (this is the zlib license)
30
31 Modified by A. Zonenberg:
32 * Added convenient nicknames to make these functions fit the Intel intrisic naming schema used by IPP
33 * Added __attribute__((target("avx2"))) to each function for use in mixed environments
34 * Added function prototypes
35 * Removed no-AVX2 functionality since we only use these functions if AVX2 is present
36 * Fixed _PS256_CONST
37 * Moved constants inside functions so we can handle compiling without -mavx2
38 * Moved a bunch of implementation stuff to a source file
39*/
40
41#ifdef __x86_64__
42
43#include <immintrin.h>
44
45/* yes I know, the top of this file is quite ugly */
46# define ALIGN32_BEG
47# define ALIGN32_END __attribute__((aligned(32)))
48
49/* __m128 is ugly to write */
50typedef __m256 v8sf; // vector of 8 float (avx)
51typedef __m256i v8si; // vector of 8 int (avx)
52
53//Added function prototypes
54__attribute__((target("avx2")))
55v8sf _mm256_log_ps(v8sf);
56__attribute__((target("avx2")))
57v8sf exp256_ps(v8sf);
58__attribute__((target("avx2")))
59v8sf _mm256_sin_ps(v8sf);
60__attribute__((target("avx2")))
61v8sf _mm256_cos_ps(v8sf);
62__attribute__((target("avx2")))
63void _mm256_sincos_ps(v8sf xx, v8sf*, v8sf*);
64
65/* declare some AVX constants -- why can't I figure a better way to do that? */
66
67#define _PS256_CONST(Name, Val) \
68 v8sf _ps256_##Name = { Val, Val, Val, Val, Val, Val, Val, Val }
69#define _PI32_CONST256(Name, Val) \
70 int _pi32_256_tmp_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val }; \
71 v8si _pi32_256_##Name = *reinterpret_cast<v8si*>(&_pi32_256_tmp_##Name)
72#define _PS256_CONST_TYPE(Name, Type, Val) \
73 int _pi32_256_tmp_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val }; \
74 v8sf _ps256_##Name = _mm256_load_ps((float*)&_pi32_256_tmp_##Name)
75
76#endif /* __x86_64__ */