tesseract  5.0.0
dotproductneon.cpp
Go to the documentation of this file.
1 // File: dotproductneon.cpp
3 // Description: Dot product function for ARM NEON.
4 // Author: Stefan Weil
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
16 
17 #if defined(__ARM_NEON)
18 
19 #include <arm_neon.h>
20 #include "dotproduct.h"
21 
22 namespace tesseract {
23 
24 // Documentation:
25 // https://developer.arm.com/architectures/instruction-sets/intrinsics/
26 
27 #if defined(FAST_FLOAT) && defined(__ARM_ARCH_ISA_A64)
28 
29 float DotProductNEON(const float *u, const float *v, int n) {
30  float32x4_t result0123 = vdupq_n_f32(0.0f);
31  float32x4_t result4567 = vdupq_n_f32(0.0f);
32  while (n > 7) {
33  // Calculate 8 dot products per iteration.
34  float32x4_t u0 = vld1q_f32(u);
35  float32x4_t v0 = vld1q_f32(v);
36  float32x4_t u4 = vld1q_f32(u + 4);
37  float32x4_t v4 = vld1q_f32(v + 4);
38  result0123 = vfmaq_f32(result0123, u0, v0);
39  result4567 = vfmaq_f32(result4567, u4, v4);
40  u += 8;
41  v += 8;
42  n -= 8;
43  }
44  float total = vaddvq_f32(result0123);
45  total += vaddvq_f32(result4567);
46  while (n > 0) {
47  total += *u++ * *v++;
48  n--;
49  }
50  return total;
51 }
52 
53 #else
54 
55 // Computes and returns the dot product of the two n-vectors u and v.
56 TFloat DotProductNEON(const TFloat *u, const TFloat *v, int n) {
57  TFloat total = 0;
58 #if defined(OPENMP_SIMD) || defined(_OPENMP)
59 #pragma omp simd reduction(+:total)
60 #endif
61  for (int k = 0; k < n; k++) {
62  total += u[k] * v[k];
63  }
64  return total;
65 }
66 
67 #endif
68 
69 } // namespace tesseract
70 
71 #endif /* __ARM_NEON */
TFloat DotProductNEON(const TFloat *u, const TFloat *v, int n)
double TFloat
Definition: tesstypes.h:39