tesseract  5.0.0
oclkernels.h
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use this file except in compliance with the License.
3 // You may obtain a copy of the License at
4 // http://www.apache.org/licenses/LICENSE-2.0
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 
11 #ifndef TESSERACT_OPENCL_OCLKERNELS_H_
12 # define TESSERACT_OPENCL_OCLKERNELS_H_
13 
14 # ifndef USE_EXTERNAL_KERNEL
15 # define KERNEL(...) # __VA_ARGS__ "\n"
16 // Double precision is a default of spreadsheets
17 // cl_khr_fp64: Khronos extension
18 // cl_amd_fp64: AMD extension
19 // use build option outside to define fp_t
21 static const char *kernel_src = KERNEL(
22 \n #ifdef KHR_DP_EXTENSION\n
23 \n #pragma OPENCL EXTENSION cl_khr_fp64
24  : enable\n
25 \n #elif AMD_DP_EXTENSION\n
26 \n #pragma OPENCL EXTENSION cl_amd_fp64
27  : enable\n
28 \n #else \n
29 \n #endif \n
30  __kernel void composeRGBPixel(__global uint *tiffdata, int w, int h, int wpl, __global uint *output) {
31  int i = get_global_id(1);
32  int j = get_global_id(0);
33  int tiffword, rval, gval, bval;
34 
35  //Ignore the excess
36  if ((i >= h) || (j >= w))
37  return;
38 
39  tiffword = tiffdata[i * w + j];
40  rval = ((tiffword)&0xff);
41  gval = (((tiffword) >> 8) & 0xff);
42  bval = (((tiffword) >> 16) & 0xff);
43  output[i * wpl + j] = (rval << (8 * (sizeof(uint) - 1 - 0))) | (gval << (8 * (sizeof(uint) - 1 - 1))) | (bval << (8 * (sizeof(uint) - 1 - 2)));
44  })
45 
46  KERNEL(
47 \n__kernel void pixSubtract_inplace(__global int *dword, __global int *sword, const int wpl, const int h) {
48  const unsigned int row = get_global_id(1);
49  const unsigned int col = get_global_id(0);
50  const unsigned int pos = row * wpl + col;
51 
52  //Ignore the execss
53  if (row >= h || col >= wpl)
54  return;
55 
56  *(dword + pos) &= ~(*(sword + pos));
57  }\n)
58 
59  KERNEL(
60 \n__kernel void morphoDilateHor_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
61  const unsigned int pos = get_global_id(0);
62  unsigned int prevword, nextword, currword, tempword;
63  unsigned int destword;
64  const int col = pos % wpl;
65 
66  //Ignore the execss
67  if (pos >= (wpl * h))
68  return;
69 
70  currword = *(sword + pos);
71  destword = currword;
72 
73  //Handle boundary conditions
74  if (col == 0)
75  prevword = 0;
76  else
77  prevword = *(sword + pos - 1);
78 
79  if (col == (wpl - 1))
80  nextword = 0;
81  else
82  nextword = *(sword + pos + 1);
83 
84  //Loop unrolled
85 
86  //1 bit to left and 1 bit to right
87  //Get the max value on LHS of every pixel
88  tempword = (prevword << (31)) | ((currword >> 1));
89  destword |= tempword;
90  //Get max value on RHS of every pixel
91  tempword = (currword << 1) | (nextword >> (31));
92  destword |= tempword;
93 
94  //2 bit to left and 2 bit to right
95  //Get the max value on LHS of every pixel
96  tempword = (prevword << (30)) | ((currword >> 2));
97  destword |= tempword;
98  //Get max value on RHS of every pixel
99  tempword = (currword << 2) | (nextword >> (30));
100  destword |= tempword;
101 
102  *(dword + pos) = destword;
103  }\n)
104 
105  KERNEL(
106 \n__kernel void morphoDilateVer_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
107  const int col = get_global_id(0);
108  const int row = get_global_id(1);
109  const unsigned int pos = row * wpl + col;
110  unsigned int tempword;
111  unsigned int destword;
112  int i;
113 
114  //Ignore the execss
115  if (row >= h || col >= wpl)
116  return;
117 
118  destword = *(sword + pos);
119 
120  //2 words above
121  i = (row - 2) < 0 ? row : (row - 2);
122  tempword = *(sword + i * wpl + col);
123  destword |= tempword;
124 
125  //1 word above
126  i = (row - 1) < 0 ? row : (row - 1);
127  tempword = *(sword + i * wpl + col);
128  destword |= tempword;
129 
130  //1 word below
131  i = (row >= (h - 1)) ? row : (row + 1);
132  tempword = *(sword + i * wpl + col);
133  destword |= tempword;
134 
135  //2 words below
136  i = (row >= (h - 2)) ? row : (row + 2);
137  tempword = *(sword + i * wpl + col);
138  destword |= tempword;
139 
140  *(dword + pos) = destword;
141  }\n)
142 
143  KERNEL(
144 \n__kernel void morphoDilateHor(__global int *sword, __global int *dword, const int xp, const int xn, const int wpl, const int h) {
145  const int col = get_global_id(0);
146  const int row = get_global_id(1);
147  const unsigned int pos = row * wpl + col;
148  unsigned int parbitsxp, parbitsxn, nwords;
149  unsigned int destword, tempword, lastword, currword;
150  unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
151  int i, j, siter, eiter;
152 
153  //Ignore the execss
154  if (pos >= (wpl * h) || (xn < 1 && xp < 1))
155  return;
156 
157  currword = *(sword + pos);
158  destword = currword;
159 
160  parbitsxp = xp & 31;
161  parbitsxn = xn & 31;
162  nwords = xp >> 5;
163 
164  if (parbitsxp > 0)
165  nwords += 1;
166  else
167  parbitsxp = 31;
168 
169  siter = (col - nwords);
170  eiter = (col + nwords);
171 
172  //Get prev word
173  if (col == 0)
174  firstword = 0x0;
175  else
176  firstword = *(sword + pos - 1);
177 
178  //Get next word
179  if (col == (wpl - 1))
180  secondword = 0x0;
181  else
182  secondword = *(sword + pos + 1);
183 
184  //Last partial bits on either side
185  for (i = 1; i <= parbitsxp; i++) {
186  //Get the max value on LHS of every pixel
187  tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32 - i)) | ((currword >> i));
188 
189  destword |= tempword;
190 
191  //Get max value on RHS of every pixel
192  tempword = (currword << i) | (secondword >> (32 - i));
193  destword |= tempword;
194  }
195 
196  //Return if halfwidth <= 1 word
197  if (nwords == 1) {
198  if (xn == 32) {
199  destword |= firstword;
200  }
201  if (xp == 32) {
202  destword |= secondword;
203  }
204 
205  *(dword + pos) = destword;
206  return;
207  }
208 
209  if (siter < 0)
210  firstword = 0x0;
211  else
212  firstword = *(sword + row * wpl + siter);
213 
214  if (eiter >= wpl)
215  lastword = 0x0;
216  else
217  lastword = *(sword + row * wpl + eiter);
218 
219  for (i = 1; i < nwords; i++) {
220  //Gets LHS words
221  if ((siter + i) < 0)
222  secondword = 0x0;
223  else
224  secondword = *(sword + row * wpl + siter + i);
225 
226  lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
227 
228  firstword = secondword;
229 
230  if ((siter + i + 1) < 0)
231  secondword = 0x0;
232  else
233  secondword = *(sword + row * wpl + siter + i + 1);
234 
235  lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
236 
237  //Gets RHS words
238  if ((eiter - i) >= wpl)
239  firstword = 0x0;
240  else
241  firstword = *(sword + row * wpl + eiter - i);
242 
243  rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
244 
245  lastword = firstword;
246  if ((eiter - i - 1) >= wpl)
247  firstword = 0x0;
248  else
249  firstword = *(sword + row * wpl + eiter - i - 1);
250 
251  rprevword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
252 
253  for (j = 1; j < 32; j++) {
254  //OR LHS full words
255  tempword = (lprevword << j) | (lnextword >> (32 - j));
256  destword |= tempword;
257 
258  //OR RHS full words
259  tempword = (rprevword << j) | (rnextword >> (32 - j));
260  destword |= tempword;
261  }
262 
263  destword |= lprevword;
264  destword |= lnextword;
265  destword |= rprevword;
266  destword |= rnextword;
267 
268  lastword = firstword;
269  firstword = secondword;
270  }
271 
272  *(dword + pos) = destword;
273  }\n)
274 
275  KERNEL(
276 \n__kernel void morphoDilateHor_32word(__global int *sword, __global int *dword, const int halfwidth, const int wpl, const int h, const char isEven) {
277  const int col = get_global_id(0);
278  const int row = get_global_id(1);
279  const unsigned int pos = row * wpl + col;
280  unsigned int prevword, nextword, currword, tempword;
281  unsigned int destword;
282  int i;
283 
284  //Ignore the execss
285  if (pos >= (wpl * h))
286  return;
287 
288  currword = *(sword + pos);
289  destword = currword;
290 
291  //Handle boundary conditions
292  if (col == 0)
293  prevword = 0;
294  else
295  prevword = *(sword + pos - 1);
296 
297  if (col == (wpl - 1))
298  nextword = 0;
299  else
300  nextword = *(sword + pos + 1);
301 
302  for (i = 1; i <= halfwidth; i++) {
303  //Get the max value on LHS of every pixel
304  if (i == halfwidth && isEven) {
305  tempword = 0x0;
306  } else {
307  tempword = (prevword << (32 - i)) | ((currword >> i));
308  }
309 
310  destword |= tempword;
311 
312  //Get max value on RHS of every pixel
313  tempword = (currword << i) | (nextword >> (32 - i));
314 
315  destword |= tempword;
316  }
317 
318  *(dword + pos) = destword;
319  }\n)
320 
321  KERNEL(
322 \n__kernel void morphoDilateVer(__global int *sword, __global int *dword, const int yp, const int wpl, const int h, const int yn) {
323  const int col = get_global_id(0);
324  const int row = get_global_id(1);
325  const unsigned int pos = row * wpl + col;
326  unsigned int tempword;
327  unsigned int destword;
328  int i, siter, eiter;
329 
330  //Ignore the execss
331  if (row >= h || col >= wpl)
332  return;
333 
334  destword = *(sword + pos);
335 
336  //Set start position and end position considering the boundary conditions
337  siter = (row - yn) < 0 ? 0 : (row - yn);
338  eiter = (row >= (h - yp)) ? (h - 1) : (row + yp);
339 
340  for (i = siter; i <= eiter; i++) {
341  tempword = *(sword + i * wpl + col);
342 
343  destword |= tempword;
344  }
345 
346  *(dword + pos) = destword;
347  }\n)
348 
349  KERNEL(
350 \n__kernel void morphoErodeHor_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
351  const unsigned int pos = get_global_id(0);
352  unsigned int prevword, nextword, currword, tempword;
353  unsigned int destword;
354  const int col = pos % wpl;
355 
356  //Ignore the execss
357  if (pos >= (wpl * h))
358  return;
359 
360  currword = *(sword + pos);
361  destword = currword;
362 
363  //Handle boundary conditions
364  if (col == 0)
365  prevword = 0xffffffff;
366  else
367  prevword = *(sword + pos - 1);
368 
369  if (col == (wpl - 1))
370  nextword = 0xffffffff;
371  else
372  nextword = *(sword + pos + 1);
373 
374  //Loop unrolled
375 
376  //1 bit to left and 1 bit to right
377  //Get the min value on LHS of every pixel
378  tempword = (prevword << (31)) | ((currword >> 1));
379  destword &= tempword;
380  //Get min value on RHS of every pixel
381  tempword = (currword << 1) | (nextword >> (31));
382  destword &= tempword;
383 
384  //2 bit to left and 2 bit to right
385  //Get the min value on LHS of every pixel
386  tempword = (prevword << (30)) | ((currword >> 2));
387  destword &= tempword;
388  //Get min value on RHS of every pixel
389  tempword = (currword << 2) | (nextword >> (30));
390  destword &= tempword;
391 
392  *(dword + pos) = destword;
393  }\n)
394 
395  KERNEL(
396 \n__kernel void morphoErodeVer_5x5(__global int *sword, __global int *dword, const int wpl, const int h, const int fwmask, const int lwmask) {
397  const int col = get_global_id(0);
398  const int row = get_global_id(1);
399  const unsigned int pos = row * wpl + col;
400  unsigned int tempword;
401  unsigned int destword;
402  int i;
403 
404  //Ignore the execss
405  if (row >= h || col >= wpl)
406  return;
407 
408  destword = *(sword + pos);
409 
410  if (row < 2 || row >= (h - 2)) {
411  destword = 0x0;
412  } else {
413  //2 words above
414  //i = (row - 2) < 0 ? row : (row - 2);
415  i = (row - 2);
416  tempword = *(sword + i * wpl + col);
417  destword &= tempword;
418 
419  //1 word above
420  //i = (row - 1) < 0 ? row : (row - 1);
421  i = (row - 1);
422  tempword = *(sword + i * wpl + col);
423  destword &= tempword;
424 
425  //1 word below
426  //i = (row >= (h - 1)) ? row : (row + 1);
427  i = (row + 1);
428  tempword = *(sword + i * wpl + col);
429  destword &= tempword;
430 
431  //2 words below
432  //i = (row >= (h - 2)) ? row : (row + 2);
433  i = (row + 2);
434  tempword = *(sword + i * wpl + col);
435  destword &= tempword;
436 
437  if (col == 0) {
438  destword &= fwmask;
439  }
440  if (col == (wpl - 1)) {
441  destword &= lwmask;
442  }
443  }
444 
445  *(dword + pos) = destword;
446  }\n)
447 
448  KERNEL(
449 \n__kernel void morphoErodeHor(__global int *sword, __global int *dword, const int xp, const int xn, const int wpl, const int h, const char isAsymmetric, const int rwmask, const int lwmask) {
450  const int col = get_global_id(0);
451  const int row = get_global_id(1);
452  const unsigned int pos = row * wpl + col;
453  unsigned int parbitsxp, parbitsxn, nwords;
454  unsigned int destword, tempword, lastword, currword;
455  unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
456  int i, j, siter, eiter;
457 
458  //Ignore the execss
459  if (pos >= (wpl * h) || (xn < 1 && xp < 1))
460  return;
461 
462  currword = *(sword + pos);
463  destword = currword;
464 
465  parbitsxp = xp & 31;
466  parbitsxn = xn & 31;
467  nwords = xp >> 5;
468 
469  if (parbitsxp > 0)
470  nwords += 1;
471  else
472  parbitsxp = 31;
473 
474  siter = (col - nwords);
475  eiter = (col + nwords);
476 
477  //Get prev word
478  if (col == 0)
479  firstword = 0xffffffff;
480  else
481  firstword = *(sword + pos - 1);
482 
483  //Get next word
484  if (col == (wpl - 1))
485  secondword = 0xffffffff;
486  else
487  secondword = *(sword + pos + 1);
488 
489  //Last partial bits on either side
490  for (i = 1; i <= parbitsxp; i++) {
491  //Get the max value on LHS of every pixel
492  tempword = (firstword << (32 - i)) | ((currword >> i));
493  destword &= tempword;
494 
495  //Get max value on RHS of every pixel
496  tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i));
497 
498  //tempword = (currword << i) | (secondword >> (32 - i));
499  destword &= tempword;
500  }
501 
502  //Return if halfwidth <= 1 word
503  if (nwords == 1) {
504  if (xp == 32) {
505  destword &= firstword;
506  }
507  if (xn == 32) {
508  destword &= secondword;
509  }
510 
511  //Clear boundary pixels
512  if (isAsymmetric) {
513  if (col == 0)
514  destword &= rwmask;
515  if (col == (wpl - 1))
516  destword &= lwmask;
517  }
518 
519  *(dword + pos) = destword;
520  return;
521  }
522 
523  if (siter < 0)
524  firstword = 0xffffffff;
525  else
526  firstword = *(sword + row * wpl + siter);
527 
528  if (eiter >= wpl)
529  lastword = 0xffffffff;
530  else
531  lastword = *(sword + row * wpl + eiter);
532 
533  for (i = 1; i < nwords; i++) {
534  //Gets LHS words
535  if ((siter + i) < 0)
536  secondword = 0xffffffff;
537  else
538  secondword = *(sword + row * wpl + siter + i);
539 
540  lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
541 
542  firstword = secondword;
543 
544  if ((siter + i + 1) < 0)
545  secondword = 0xffffffff;
546  else
547  secondword = *(sword + row * wpl + siter + i + 1);
548 
549  lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
550 
551  //Gets RHS words
552  if ((eiter - i) >= wpl)
553  firstword = 0xffffffff;
554  else
555  firstword = *(sword + row * wpl + eiter - i);
556 
557  rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
558 
559  lastword = firstword;
560  if ((eiter - i - 1) >= wpl)
561  firstword = 0xffffffff;
562  else
563  firstword = *(sword + row * wpl + eiter - i - 1);
564 
565  rprevword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
566 
567  for (j = 0; j < 32; j++) {
568  //OR LHS full words
569  tempword = (lprevword << j) | (lnextword >> (32 - j));
570  destword &= tempword;
571 
572  //OR RHS full words
573  tempword = (rprevword << j) | (rnextword >> (32 - j));
574  destword &= tempword;
575  }
576 
577  destword &= lprevword;
578  destword &= lnextword;
579  destword &= rprevword;
580  destword &= rnextword;
581 
582  lastword = firstword;
583  firstword = secondword;
584  }
585 
586  if (isAsymmetric) {
587  //Clear boundary pixels
588  if (col < (nwords - 1))
589  destword = 0x0;
590  else if (col == (nwords - 1))
591  destword &= rwmask;
592  else if (col > (wpl - nwords))
593  destword = 0x0;
594  else if (col == (wpl - nwords))
595  destword &= lwmask;
596  }
597 
598  *(dword + pos) = destword;
599  }\n)
600 
601  KERNEL(
602 \n__kernel void morphoErodeHor_32word(__global int *sword, __global int *dword, const int halfwidth, const int wpl, const int h, const char clearBoundPixH, const int rwmask, const int lwmask, const char isEven) {
603  const int col = get_global_id(0);
604  const int row = get_global_id(1);
605  const unsigned int pos = row * wpl + col;
606  unsigned int prevword, nextword, currword, tempword, destword;
607  int i;
608 
609  //Ignore the execss
610  if (pos >= (wpl * h))
611  return;
612 
613  currword = *(sword + pos);
614  destword = currword;
615 
616  //Handle boundary conditions
617  if (col == 0)
618  prevword = 0xffffffff;
619  else
620  prevword = *(sword + pos - 1);
621 
622  if (col == (wpl - 1))
623  nextword = 0xffffffff;
624  else
625  nextword = *(sword + pos + 1);
626 
627  for (i = 1; i <= halfwidth; i++) {
628  //Get the min value on LHS of every pixel
629  tempword = (prevword << (32 - i)) | ((currword >> i));
630 
631  destword &= tempword;
632 
633  //Get min value on RHS of every pixel
634  if (i == halfwidth && isEven) {
635  tempword = 0xffffffff;
636  } else {
637  tempword = (currword << i) | (nextword >> (32 - i));
638  }
639 
640  destword &= tempword;
641  }
642 
643  if (clearBoundPixH) {
644  if (col == 0) {
645  destword &= rwmask;
646  } else if (col == (wpl - 1)) {
647  destword &= lwmask;
648  }
649  }
650 
651  *(dword + pos) = destword;
652  }\n)
653 
654  KERNEL(
655 \n__kernel void morphoErodeVer(__global int *sword, __global int *dword, const int yp, const int wpl, const int h, const char clearBoundPixV, const int yn) {
656  const int col = get_global_id(0);
657  const int row = get_global_id(1);
658  const unsigned int pos = row * wpl + col;
659  unsigned int tempword, destword;
660  int i, siter, eiter;
661 
662  //Ignore the execss
663  if (row >= h || col >= wpl)
664  return;
665 
666  destword = *(sword + pos);
667 
668  //Set start position and end position considering the boundary conditions
669  siter = (row - yp) < 0 ? 0 : (row - yp);
670  eiter = (row >= (h - yn)) ? (h - 1) : (row + yn);
671 
672  for (i = siter; i <= eiter; i++) {
673  tempword = *(sword + i * wpl + col);
674 
675  destword &= tempword;
676  }
677 
678  //Clear boundary pixels
679  if (clearBoundPixV && ((row < yp) || ((h - row) <= yn))) {
680  destword = 0x0;
681  }
682 
683  *(dword + pos) = destword;
684  }\n)
685 
686  // HistogramRect Kernel: Accumulate
687  // assumes 4 channels, i.e., bytes_per_pixel = 4
688  // assumes number of pixels is multiple of 8
689  // data is laid out as
690  // ch0 ch1 ...
691  // bin0 bin1 bin2... bin0...
692  // rpt0,1,2...256 rpt0,1,2...
693  KERNEL(
694 \n #define HIST_REDUNDANCY 256\n
695 \n #define GROUP_SIZE 256\n
696 \n #define HIST_SIZE 256\n
697 \n #define NUM_CHANNELS 4\n
698 \n #define HR_UNROLL_SIZE 8 \n
699 \n #define HR_UNROLL_TYPE uchar8 \n
700 
701  __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectAllChannels(__global const uchar8 *data, uint numPixels, __global uint *histBuffer) {
702  // declare variables
703  uchar8 pixels;
704  int threadOffset = get_global_id(0) % HIST_REDUNDANCY;
705 
706  // for each pixel/channel, accumulate in global memory
707  for (uint pc = get_global_id(0); pc < numPixels * NUM_CHANNELS / HR_UNROLL_SIZE; pc += get_global_size(0)) {
708  pixels = data[pc];
709  // channel bin thread
710  atomic_inc(&histBuffer[0 * HIST_SIZE * HIST_REDUNDANCY + pixels.s0 * HIST_REDUNDANCY + threadOffset]); // ch0
711  atomic_inc(&histBuffer[0 * HIST_SIZE * HIST_REDUNDANCY + pixels.s4 * HIST_REDUNDANCY + threadOffset]); // ch0
712  atomic_inc(&histBuffer[1 * HIST_SIZE * HIST_REDUNDANCY + pixels.s1 * HIST_REDUNDANCY + threadOffset]); // ch1
713  atomic_inc(&histBuffer[1 * HIST_SIZE * HIST_REDUNDANCY + pixels.s5 * HIST_REDUNDANCY + threadOffset]); // ch1
714  atomic_inc(&histBuffer[2 * HIST_SIZE * HIST_REDUNDANCY + pixels.s2 * HIST_REDUNDANCY + threadOffset]); // ch2
715  atomic_inc(&histBuffer[2 * HIST_SIZE * HIST_REDUNDANCY + pixels.s6 * HIST_REDUNDANCY + threadOffset]); // ch2
716  atomic_inc(&histBuffer[3 * HIST_SIZE * HIST_REDUNDANCY + pixels.s3 * HIST_REDUNDANCY + threadOffset]); // ch3
717  atomic_inc(&histBuffer[3 * HIST_SIZE * HIST_REDUNDANCY + pixels.s7 * HIST_REDUNDANCY + threadOffset]); // ch3
718  }
719  })
720 
721  KERNEL(
722  // NUM_CHANNELS = 1
723  __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectOneChannel(__global const uchar8 *data, uint numPixels, __global uint *histBuffer) {
724  // declare variables
725  uchar8 pixels;
726  int threadOffset = get_global_id(0) % HIST_REDUNDANCY;
727 
728  // for each pixel/channel, accumulate in global memory
729  for (uint pc = get_global_id(0); pc < numPixels / HR_UNROLL_SIZE; pc += get_global_size(0)) {
730  pixels = data[pc];
731  // bin thread
732  atomic_inc(&histBuffer[pixels.s0 * HIST_REDUNDANCY + threadOffset]);
733  atomic_inc(&histBuffer[pixels.s1 * HIST_REDUNDANCY + threadOffset]);
734  atomic_inc(&histBuffer[pixels.s2 * HIST_REDUNDANCY + threadOffset]);
735  atomic_inc(&histBuffer[pixels.s3 * HIST_REDUNDANCY + threadOffset]);
736  atomic_inc(&histBuffer[pixels.s4 * HIST_REDUNDANCY + threadOffset]);
737  atomic_inc(&histBuffer[pixels.s5 * HIST_REDUNDANCY + threadOffset]);
738  atomic_inc(&histBuffer[pixels.s6 * HIST_REDUNDANCY + threadOffset]);
739  atomic_inc(&histBuffer[pixels.s7 * HIST_REDUNDANCY + threadOffset]);
740  }
741  })
742 
743  // HistogramRect Kernel: Reduction
744  // only supports 4 channels
745  // each work group handles a single channel of a single histogram bin
746  KERNEL(__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectAllChannelsReduction(int n, // unused pixel redundancy
747  __global uint *histBuffer, __global int *histResult) {
748  // declare variables
749  int channel = get_group_id(0) / HIST_SIZE;
750  int bin = get_group_id(0) % HIST_SIZE;
751  int value = 0;
752 
753  // accumulate in register
754  for (uint i = get_local_id(0); i < HIST_REDUNDANCY; i += GROUP_SIZE) {
755  value += histBuffer[channel * HIST_SIZE * HIST_REDUNDANCY + bin * HIST_REDUNDANCY + i];
756  }
757 
758  // reduction in local memory
759  __local int localHist[GROUP_SIZE];
760  localHist[get_local_id(0)] = value;
761  barrier(CLK_LOCAL_MEM_FENCE);
762  for (int stride = GROUP_SIZE / 2; stride >= 1; stride /= 2) {
763  if (get_local_id(0) < stride) {
764  value = localHist[get_local_id(0) + stride];
765  }
766  barrier(CLK_LOCAL_MEM_FENCE);
767  if (get_local_id(0) < stride) {
768  localHist[get_local_id(0)] += value;
769  }
770  barrier(CLK_LOCAL_MEM_FENCE);
771  }
772 
773  // write reduction to final result
774  if (get_local_id(0) == 0) {
775  histResult[get_group_id(0)] = localHist[0];
776  }
777  } // kernel_HistogramRectAllChannels
778  )
779 
780  KERNEL(
781  // NUM_CHANNELS = 1
782  __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectOneChannelReduction(int n, // unused pixel redundancy
783  __global uint *histBuffer, __global int *histResult) {
784  // declare variables
785  // int channel = get_group_id(0)/HIST_SIZE;
786  int bin = get_group_id(0) % HIST_SIZE;
787  int value = 0;
788 
789  // accumulate in register
790  for (int i = get_local_id(0); i < HIST_REDUNDANCY; i += GROUP_SIZE) {
791  value += histBuffer[bin * HIST_REDUNDANCY + i];
792  }
793 
794  // reduction in local memory
795  __local int localHist[GROUP_SIZE];
796  localHist[get_local_id(0)] = value;
797  barrier(CLK_LOCAL_MEM_FENCE);
798  for (int stride = GROUP_SIZE / 2; stride >= 1; stride /= 2) {
799  if (get_local_id(0) < stride) {
800  value = localHist[get_local_id(0) + stride];
801  }
802  barrier(CLK_LOCAL_MEM_FENCE);
803  if (get_local_id(0) < stride) {
804  localHist[get_local_id(0)] += value;
805  }
806  barrier(CLK_LOCAL_MEM_FENCE);
807  }
808 
809  // write reduction to final result
810  if (get_local_id(0) == 0) {
811  histResult[get_group_id(0)] = localHist[0];
812  }
813  } // kernel_HistogramRectOneChannelReduction
814  )
815 
816  // ThresholdRectToPix Kernel
817  // only supports 4 channels
818  // imageData is input image (24-bits/pixel)
819  // pix is output image (1-bit/pixel)
820  KERNEL(
821 \n #define CHAR_VEC_WIDTH 4 \n
822 \n #define PIXELS_PER_WORD 32 \n
823 \n #define PIXELS_PER_BURST 8 \n
824 \n #define BURSTS_PER_WORD(PIXELS_PER_WORD / PIXELS_PER_BURST) \n typedef union {
825  uchar s[PIXELS_PER_BURST * NUM_CHANNELS];
826  uchar4 v[(PIXELS_PER_BURST * NUM_CHANNELS) / CHAR_VEC_WIDTH];
827  } charVec;
828 
829  __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix(__global const uchar4 *imageData, int height, int width,
830  int wpl, // words per line
831  __global int *thresholds, __global int *hi_values, __global int *pix) {
832  // declare variables
833  int pThresholds[NUM_CHANNELS];
834  int pHi_Values[NUM_CHANNELS];
835  for (int i = 0; i < NUM_CHANNELS; i++) {
836  pThresholds[i] = thresholds[i];
837  pHi_Values[i] = hi_values[i];
838  }
839 
840  // for each word (32 pixels) in output image
841  for (uint w = get_global_id(0); w < wpl * height; w += get_global_size(0)) {
842  unsigned int word = 0; // all bits start at zero
843  // for each burst in word
844  for (int b = 0; b < BURSTS_PER_WORD; b++) {
845  // load burst
846  charVec pixels;
847  int offset = (w / wpl) * width;
848  offset += (w % wpl) * PIXELS_PER_WORD;
849  offset += b * PIXELS_PER_BURST;
850 
851  for (int i = 0; i < PIXELS_PER_BURST; ++i)
852  pixels.v[i] = imageData[offset + i];
853 
854  // for each pixel in burst
855  for (int p = 0; p < PIXELS_PER_BURST; p++) {
856  for (int c = 0; c < NUM_CHANNELS; c++) {
857  unsigned char pixChan = pixels.s[p * NUM_CHANNELS + c];
858  if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) {
859  const uint kTopBit = 0x80000000;
860  word |= (kTopBit >> ((b * PIXELS_PER_BURST + p) & 31));
861  }
862  }
863  }
864  }
865  pix[w] = word;
866  }
867  }
868 
869 \n #define CHAR_VEC_WIDTH 8 \n
870 \n #define PIXELS_PER_WORD 32 \n
871 \n #define PIXELS_PER_BURST 8 \n
872 \n #define BURSTS_PER_WORD(PIXELS_PER_WORD / PIXELS_PER_BURST) \n typedef union {
873  uchar s[PIXELS_PER_BURST * 1];
874  uchar8 v[(PIXELS_PER_BURST * 1) / CHAR_VEC_WIDTH];
875  } charVec1;
876 
877  __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix_OneChan(__global const uchar8 *imageData, int height, int width,
878  int wpl, // words per line of output image
879  __global int *thresholds, __global int *hi_values, __global int *pix) {
880  // declare variables
881  int pThresholds[1];
882  int pHi_Values[1];
883  for (int i = 0; i < 1; i++) {
884  pThresholds[i] = thresholds[i];
885  pHi_Values[i] = hi_values[i];
886  }
887 
888  // for each word (32 pixels) in output image
889  for (uint w = get_global_id(0); w < wpl * height; w += get_global_size(0)) {
890  unsigned int word = 0; // all bits start at zero
891 
892  // for each burst in word
893  for (int b = 0; b < BURSTS_PER_WORD; b++) {
894  // load burst
895  charVec1 pixels;
896  // for each char8 in burst
897  pixels.v[0] = imageData[w * BURSTS_PER_WORD + b + 0];
898 
899  // for each pixel in burst
900  for (int p = 0; p < PIXELS_PER_BURST; p++) {
901  //int littleEndianIdx = p ^ 3;
902  //int bigEndianIdx = p;
903  int idx =
904 \n #ifdef __ENDIAN_LITTLE__\n
905  p ^
906  3;
907  \n #else \n
908  p;
909  \n #endif \n unsigned char pixChan = pixels.s[idx];
910  if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) {
911  const uint kTopBit = 0x80000000;
912  word |= (kTopBit >> ((b * PIXELS_PER_BURST + p) & 31));
913  }
914  }
915  }
916  pix[w] = word;
917  }
918  })
919 
920  ; // close char*
921 
922 # endif // USE_EXTERNAL_KERNEL
923 #endif // TESSERACT_OPENCL_OCLKERNELS_H_
924 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#define __attribute__(attr)
Definition: errcode.h:41
#define KERNEL(...)
Definition: oclkernels.h:15
unsigned char uchar
Definition: utfdef.h:8
unsigned int uint
Definition: utfdef.h:10