tesseract  5.0.0
linefind.cpp
Go to the documentation of this file.
1 // File: linefind.cpp
3 // Description: Class to find vertical lines in an image and create
4 // a corresponding list of empty blobs.
5 // Author: Ray Smith
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifdef HAVE_CONFIG_H
21 # include "config_auto.h"
22 #endif
23 
24 #include "alignedblob.h"
25 #include "blobbox.h"
26 #include "crakedge.h" // for CRACKEDGE
27 #include "edgblob.h"
28 #include "linefind.h"
29 #include "tabvector.h"
30 #if defined(USE_OPENCL)
31 # include "openclwrapper.h" // for OpenclDevice
32 #endif
33 
34 #include <algorithm>
35 
36 namespace tesseract {
37 
39 const int kThinLineFraction = 20;
41 const int kMinLineLengthFraction = 4;
43 const int kCrackSpacing = 100;
45 const int kLineFindGridSize = 50;
46 // Min width of a line in pixels to be considered thick.
47 const int kMinThickLineWidth = 12;
48 // Max size of line residue. (The pixels that fail the long thin opening, and
49 // therefore don't make it to the candidate line mask, but are nevertheless
50 // part of the line.)
51 const int kMaxLineResidue = 6;
52 // Min length in inches of a line segment that exceeds kMinThickLineWidth in
53 // thickness. (Such lines shouldn't break by simple image degradation.)
54 const double kThickLengthMultiple = 0.75;
55 // Max fraction of line box area that can be occupied by non-line pixels.
56 const double kMaxNonLineDensity = 0.25;
57 // Max height of a music stave in inches.
58 const double kMaxStaveHeight = 1.0;
59 // Minimum fraction of pixels in a music rectangle connected to the staves.
60 const double kMinMusicPixelFraction = 0.75;
61 
62 // Erases the unused blobs from the line_pix image, taking into account
63 // whether this was a horizontal or vertical line set.
64 static void RemoveUnusedLineSegments(bool horizontal_lines, BLOBNBOX_LIST *line_bblobs,
65  Image line_pix) {
66  int height = pixGetHeight(line_pix);
67  BLOBNBOX_IT bbox_it(line_bblobs);
68  for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
69  BLOBNBOX *blob = bbox_it.data();
70  if (blob->left_tab_type() != TT_VLINE) {
71  const TBOX &box = blob->bounding_box();
72  Box *pixbox = nullptr;
73  if (horizontal_lines) {
74  // Horizontal lines are in tess format and also have x and y flipped
75  // (to use FindVerticalAlignment) so we have to flip x and y and then
76  // convert to Leptonica by height - flipped x (ie the right edge).
77  // See GetLineBoxes for more explanation.
78  pixbox = boxCreate(box.bottom(), height - box.right(), box.height(), box.width());
79  } else {
80  // For vertical lines, just flip upside-down to convert to Leptonica.
81  // The y position of the box in Leptonica terms is the distance from
82  // the top of the image to the top of the box.
83  pixbox = boxCreate(box.left(), height - box.top(), box.width(), box.height());
84  }
85  pixClearInRect(line_pix, pixbox);
86  boxDestroy(&pixbox);
87  }
88  }
89 }
90 
91 // Helper subtracts the line_pix image from the src_pix, and removes residue
92 // as well by removing components that touch the line, but are not in the
93 // non_line_pix mask. It is assumed that the non_line_pix mask has already
94 // been prepared to required accuracy.
95 static void SubtractLinesAndResidue(Image line_pix, Image non_line_pix, int resolution,
96  Image src_pix) {
97  // First remove the lines themselves.
98  pixSubtract(src_pix, src_pix, line_pix);
99  // Subtract the non-lines from the image to get the residue.
100  Image residue_pix = pixSubtract(nullptr, src_pix, non_line_pix);
101  // Dilate the lines so they touch the residue.
102  Image fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3);
103  // Seed fill the fat lines to get all the residue.
104  pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
105  // Subtract the residue from the original image.
106  pixSubtract(src_pix, src_pix, fat_line_pix);
107  fat_line_pix.destroy();
108  residue_pix.destroy();
109 }
110 
111 // Returns the maximum strokewidth in the given binary image by doubling
112 // the maximum of the distance function.
113 static int MaxStrokeWidth(Image pix) {
114  Image dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
115  int width = pixGetWidth(dist_pix);
116  int height = pixGetHeight(dist_pix);
117  int wpl = pixGetWpl(dist_pix);
118  l_uint32 *data = pixGetData(dist_pix);
119  // Find the maximum value in the distance image.
120  int max_dist = 0;
121  for (int y = 0; y < height; ++y) {
122  for (int x = 0; x < width; ++x) {
123  int pixel = GET_DATA_BYTE(data, x);
124  if (pixel > max_dist) {
125  max_dist = pixel;
126  }
127  }
128  data += wpl;
129  }
130  dist_pix.destroy();
131  return max_dist * 2;
132 }
133 
134 // Returns the number of components in the intersection_pix touched by line_box.
135 static int NumTouchingIntersections(Box *line_box, Image intersection_pix) {
136  if (intersection_pix == nullptr) {
137  return 0;
138  }
139  Image rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr);
140  Boxa *boxa = pixConnComp(rect_pix, nullptr, 8);
141  rect_pix.destroy();
142  if (boxa == nullptr) {
143  return false;
144  }
145  int result = boxaGetCount(boxa);
146  boxaDestroy(&boxa);
147  return result;
148 }
149 
150 // Returns the number of black pixels found in the box made by adding the line
151 // width to both sides of the line bounding box. (Increasing the smallest
152 // dimension of the bounding box.)
153 static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Image nonline_pix) {
154  l_int32 x, y, box_width, box_height;
155  boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
156  if (box_width > box_height) {
157  // horizontal line.
158  int bottom = std::min(pixGetHeight(nonline_pix), y + box_height + line_width);
159  y = std::max(0, y - line_width);
160  box_height = bottom - y;
161  } else {
162  // Vertical line.
163  int right = std::min(pixGetWidth(nonline_pix), x + box_width + line_width);
164  x = std::max(0, x - line_width);
165  box_width = right - x;
166  }
167  Box *box = boxCreate(x, y, box_width, box_height);
168  Image rect_pix = pixClipRectangle(nonline_pix, box, nullptr);
169  boxDestroy(&box);
170  l_int32 result;
171  pixCountPixels(rect_pix, &result, nullptr);
172  rect_pix.destroy();
173  return result;
174 }
175 
176 // Helper erases false-positive line segments from the input/output line_pix.
177 // 1. Since thick lines shouldn't really break up, we can eliminate some false
178 // positives by marking segments that are at least kMinThickLineWidth
179 // thickness, yet have a length less than min_thick_length.
180 // 2. Lines that don't have at least 2 intersections with other lines and have
181 // a lot of neighbouring non-lines are probably not lines (perhaps arabic
182 // or Hindi words, or underlines.)
183 // Bad line components are erased from line_pix.
184 // Returns the number of remaining connected components.
185 static int FilterFalsePositives(int resolution, Image nonline_pix, Image intersection_pix,
186  Image line_pix) {
187  int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
188  Pixa *pixa = nullptr;
189  Boxa *boxa = pixConnComp(line_pix, &pixa, 8);
190  // Iterate over the boxes to remove false positives.
191  int nboxes = boxaGetCount(boxa);
192  int remaining_boxes = nboxes;
193  for (int i = 0; i < nboxes; ++i) {
194  Box *box = boxaGetBox(boxa, i, L_CLONE);
195  l_int32 x, y, box_width, box_height;
196  boxGetGeometry(box, &x, &y, &box_width, &box_height);
197  Image comp_pix = pixaGetPix(pixa, i, L_CLONE);
198  int max_width = MaxStrokeWidth(comp_pix);
199  comp_pix.destroy();
200  bool bad_line = false;
201  // If the length is too short to stand-alone as a line, and the box width
202  // is thick enough, and the stroke width is thick enough it is bad.
203  if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
204  box_width < min_thick_length && box_height < min_thick_length &&
205  max_width > kMinThickLineWidth) {
206  // Too thick for the length.
207  bad_line = true;
208  }
209  if (!bad_line && (NumTouchingIntersections(box, intersection_pix) < 2)) {
210  // Test non-line density near the line.
211  int nonline_count = CountPixelsAdjacentToLine(max_width, box, nonline_pix);
212  if (nonline_count > box_height * box_width * kMaxNonLineDensity) {
213  bad_line = true;
214  }
215  }
216  if (bad_line) {
217  // Not a good line.
218  pixClearInRect(line_pix, box);
219  --remaining_boxes;
220  }
221  boxDestroy(&box);
222  }
223  pixaDestroy(&pixa);
224  boxaDestroy(&boxa);
225  return remaining_boxes;
226 }
227 
228 // Finds vertical and horizontal line objects in the given pix.
229 // Uses the given resolution to determine size thresholds instead of any
230 // that may be present in the pix.
231 // The output vertical_x and vertical_y contain a sum of the output vectors,
232 // thereby giving the mean vertical direction.
233 // If pix_music_mask != nullptr, and music is detected, a mask of the staves
234 // and anything that is connected (bars, notes etc.) will be returned in
235 // pix_music_mask, the mask subtracted from pix, and the lines will not
236 // appear in v_lines or h_lines.
237 // The output vectors are owned by the list and Frozen (cannot refit) by
238 // having no boxes, as there is no need to refit or merge separator lines.
239 // The detected lines are removed from the pix.
240 void LineFinder::FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x,
241  int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines,
242  TabVector_LIST *h_lines) {
243  if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) {
244  tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
245  return;
246  }
247  Image pix_vline = nullptr;
248  Image pix_non_vline = nullptr;
249  Image pix_hline = nullptr;
250  Image pix_non_hline = nullptr;
251  Image pix_intersections = nullptr;
252  Pixa *pixa_display = debug ? pixaCreate(0) : nullptr;
253  GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, &pix_non_hline,
254  &pix_intersections, pix_music_mask, pixa_display);
255  // Find lines, convert to TabVector_LIST and remove those that are used.
256  FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y, &pix_vline,
257  pix_non_vline, pix, v_lines);
258  pix_intersections.destroy();
259  if (pix_hline != nullptr) {
260  // Recompute intersections and re-filter false positive h-lines.
261  if (pix_vline != nullptr) {
262  pix_intersections = pix_vline & pix_hline;
263  }
264  if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, pix_hline)) {
265  pix_hline.destroy();
266  }
267  }
268  FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y, &pix_hline,
269  pix_non_hline, pix, h_lines);
270  if (pixa_display != nullptr && pix_vline != nullptr) {
271  pixaAddPix(pixa_display, pix_vline, L_CLONE);
272  }
273  if (pixa_display != nullptr && pix_hline != nullptr) {
274  pixaAddPix(pixa_display, pix_hline, L_CLONE);
275  }
276  pix_intersections.destroy();
277  if (pix_vline != nullptr && pix_hline != nullptr) {
278  // Remove joins (intersections) where lines cross, and the residue.
279  // Recalculate the intersections, since some lines have been deleted.
280  pix_intersections = pix_vline & pix_hline;
281  // Fatten up the intersections and seed-fill to get the intersection
282  // residue.
283  Image pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5);
284  pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
285  // Now remove the intersection residue.
286  pixSubtract(pix, pix, pix_join_residue);
287  pix_join_residue.destroy();
288  }
289  // Remove any detected music.
290  if (pix_music_mask != nullptr && *pix_music_mask != nullptr) {
291  if (pixa_display != nullptr) {
292  pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
293  }
294  pixSubtract(pix, pix, *pix_music_mask);
295  }
296  if (pixa_display != nullptr) {
297  pixaAddPix(pixa_display, pix, L_CLONE);
298  }
299 
300  pix_vline.destroy();
301  pix_non_vline.destroy();
302  pix_hline.destroy();
303  pix_non_hline.destroy();
304  pix_intersections.destroy();
305  if (pixa_display != nullptr) {
306  pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", "vhlinefinding.pdf");
307  pixaDestroy(&pixa_display);
308  }
309 }
310 
311 // Converts the Boxa array to a list of C_BLOB, getting rid of severely
312 // overlapping outlines and those that are children of a bigger one.
313 // The output is a list of C_BLOBs that are owned by the list.
314 // The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
315 // bounding boxes. The Boxa is consumed and destroyed.
316 void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, Boxa **boxes,
317  C_BLOB_LIST *blobs) {
318  C_OUTLINE_LIST outlines;
319  C_OUTLINE_IT ol_it = &outlines;
320  // Iterate the boxes to convert to outlines.
321  int nboxes = boxaGetCount(*boxes);
322  for (int i = 0; i < nboxes; ++i) {
323  l_int32 x, y, width, height;
324  boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height);
325  // Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
326  // as there is no outline, just a bounding box, but with some very
327  // small changes to coutln.cpp, it works nicely.
328  ICOORD top_left(x, y);
329  ICOORD bot_right(x + width, y + height);
330  CRACKEDGE startpt;
331  startpt.pos = top_left;
332  auto *outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
333  ol_it.add_after_then_move(outline);
334  }
335  // Use outlines_to_blobs to convert the outlines to blobs and find
336  // overlapping and contained objects. The output list of blobs in the block
337  // has all the bad ones filtered out and deleted.
338  BLOCK block;
339  ICOORD page_tl(0, 0);
340  ICOORD page_br(image_width, image_height);
341  outlines_to_blobs(&block, page_tl, page_br, &outlines);
342  // Transfer the created blobs to the output list.
343  C_BLOB_IT blob_it(blobs);
344  blob_it.add_list_after(block.blob_list());
345  // The boxes aren't needed any more.
346  boxaDestroy(boxes);
347 }
348 
349 // Finds vertical line objects in pix_vline and removes the from src_pix.
350 // Uses the given resolution to determine size thresholds instead of any
351 // that may be present in the pix.
352 // The output vertical_x and vertical_y contain a sum of the output vectors,
353 // thereby giving the mean vertical direction.
354 // The output vectors are owned by the list and Frozen (cannot refit) by
355 // having no boxes, as there is no need to refit or merge separator lines.
356 // If no good lines are found, pix_vline is destroyed.
357 // None of the input pointers may be nullptr, and if *pix_vline is nullptr then
358 // the function does nothing.
359 void LineFinder::FindAndRemoveVLines(int resolution, Image pix_intersections, int *vertical_x,
360  int *vertical_y, Image *pix_vline, Image pix_non_vline,
361  Image src_pix, TabVector_LIST *vectors) {
362  if (pix_vline == nullptr || *pix_vline == nullptr) {
363  return;
364  }
365  C_BLOB_LIST line_cblobs;
366  BLOBNBOX_LIST line_bblobs;
367  GetLineBoxes(false, *pix_vline, pix_intersections, &line_cblobs, &line_bblobs);
368  int width = pixGetWidth(src_pix);
369  int height = pixGetHeight(src_pix);
370  ICOORD bleft(0, 0);
371  ICOORD tright(width, height);
372  FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
373  if (!vectors->empty()) {
374  RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline);
375  SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix);
376  ICOORD vertical;
377  vertical.set_with_shrink(*vertical_x, *vertical_y);
378  TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
379  } else {
380  pix_vline->destroy();
381  }
382 }
383 
384 // Finds horizontal line objects in pix_hline and removes them from src_pix.
385 // Uses the given resolution to determine size thresholds instead of any
386 // that may be present in the pix.
387 // The output vertical_x and vertical_y contain a sum of the output vectors,
388 // thereby giving the mean vertical direction.
389 // The output vectors are owned by the list and Frozen (cannot refit) by
390 // having no boxes, as there is no need to refit or merge separator lines.
391 // If no good lines are found, pix_hline is destroyed.
392 // None of the input pointers may be nullptr, and if *pix_hline is nullptr then
393 // the function does nothing.
394 void LineFinder::FindAndRemoveHLines(int resolution, Image pix_intersections, int vertical_x,
395  int vertical_y, Image *pix_hline, Image pix_non_hline,
396  Image src_pix, TabVector_LIST *vectors) {
397  if (pix_hline == nullptr || *pix_hline == nullptr) {
398  return;
399  }
400  C_BLOB_LIST line_cblobs;
401  BLOBNBOX_LIST line_bblobs;
402  GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs);
403  int width = pixGetWidth(src_pix);
404  int height = pixGetHeight(src_pix);
405  ICOORD bleft(0, 0);
406  ICOORD tright(height, width);
407  FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, vectors);
408  if (!vectors->empty()) {
409  RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline);
410  SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix);
411  ICOORD vertical;
412  vertical.set_with_shrink(vertical_x, vertical_y);
413  TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
414  // Iterate the vectors to flip them. x and y were flipped for horizontal
415  // lines, so FindLineVectors can work just with the vertical case.
416  // See GetLineBoxes for more on the flip.
417  TabVector_IT h_it(vectors);
418  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
419  h_it.data()->XYFlip();
420  }
421  } else {
422  pix_hline->destroy();
423  }
424 }
425 
426 // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
427 // are the bounds of the image on which the input line_bblobs were found.
428 // The input line_bblobs list is const really.
429 // The output vertical_x and vertical_y are the total of all the vectors.
430 // The output list of TabVector makes no reference to the input BLOBNBOXes.
431 void LineFinder::FindLineVectors(const ICOORD &bleft, const ICOORD &tright,
432  BLOBNBOX_LIST *line_bblobs, int *vertical_x, int *vertical_y,
433  TabVector_LIST *vectors) {
434  BLOBNBOX_IT bbox_it(line_bblobs);
435  int b_count = 0;
436  // Put all the blobs into the grid to find the lines, and move the blobs
437  // to the output lists.
438  AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
439  for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
440  BLOBNBOX *bblob = bbox_it.data();
441  bblob->set_left_tab_type(TT_MAYBE_ALIGNED);
442  bblob->set_left_rule(bleft.x());
443  bblob->set_right_rule(tright.x());
444  bblob->set_left_crossing_rule(bleft.x());
445  bblob->set_right_crossing_rule(tright.x());
446  blob_grid.InsertBBox(false, true, bblob);
447  ++b_count;
448  }
449  if (b_count == 0) {
450  return;
451  }
452 
453  // Search the entire grid, looking for vertical line vectors.
454  BlobGridSearch lsearch(&blob_grid);
455  BLOBNBOX *bbox;
456  TabVector_IT vector_it(vectors);
457  *vertical_x = 0;
458  *vertical_y = 1;
459  lsearch.StartFullSearch();
460  while ((bbox = lsearch.NextFullSearch()) != nullptr) {
461  if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) {
462  const TBOX &box = bbox->bounding_box();
463  if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
464  tprintf("Finding line vector starting at bbox (%d,%d)\n", box.left(), box.bottom());
465  }
466  AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
467  TabVector *vector =
468  blob_grid.FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
469  if (vector != nullptr) {
470  vector->Freeze();
471  vector_it.add_to_end(vector);
472  }
473  }
474  }
475 }
476 
477 // Returns a Pix music mask if music is detected.
478 // Any vertical line that has at least 5 intersections in sufficient density
479 // is taken to be a bar. Bars are used as a seed and the entire touching
480 // component is added to the output music mask and subtracted from the lines.
481 // Returns nullptr and does minimal work if no music is found.
482 static Image FilterMusic(int resolution, Image pix_closed, Image pix_vline, Image pix_hline,
483  bool &v_empty, bool &h_empty) {
484  int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
485  Image intersection_pix = pix_vline & pix_hline;
486  Boxa *boxa = pixConnComp(pix_vline, nullptr, 8);
487  // Iterate over the boxes to find music bars.
488  int nboxes = boxaGetCount(boxa);
489  Image music_mask = nullptr;
490  for (int i = 0; i < nboxes; ++i) {
491  Box *box = boxaGetBox(boxa, i, L_CLONE);
492  l_int32 x, y, box_width, box_height;
493  boxGetGeometry(box, &x, &y, &box_width, &box_height);
494  int joins = NumTouchingIntersections(box, intersection_pix);
495  // Test for the join density being at least 5 per max_stave_height,
496  // ie (joins-1)/box_height >= (5-1)/max_stave_height.
497  if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) {
498  // This is a music bar. Add to the mask.
499  if (music_mask == nullptr) {
500  music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline), 1);
501  }
502  pixSetInRect(music_mask, box);
503  }
504  boxDestroy(&box);
505  }
506  boxaDestroy(&boxa);
507  intersection_pix.destroy();
508  if (music_mask != nullptr) {
509  // The mask currently contains just the bars. Use the mask as a seed
510  // and the pix_closed as the mask for a seedfill to get all the
511  // intersecting staves.
512  pixSeedfillBinary(music_mask, music_mask, pix_closed, 8);
513  // Filter out false positives. CCs in the music_mask should be the vast
514  // majority of the pixels in their bounding boxes, as we expect just a
515  // tiny amount of text, a few phrase marks, and crescendo etc left.
516  Boxa *boxa = pixConnComp(music_mask, nullptr, 8);
517  // Iterate over the boxes to find music components.
518  int nboxes = boxaGetCount(boxa);
519  for (int i = 0; i < nboxes; ++i) {
520  Box *box = boxaGetBox(boxa, i, L_CLONE);
521  Image rect_pix = pixClipRectangle(music_mask, box, nullptr);
522  l_int32 music_pixels;
523  pixCountPixels(rect_pix, &music_pixels, nullptr);
524  rect_pix.destroy();
525  rect_pix = pixClipRectangle(pix_closed, box, nullptr);
526  l_int32 all_pixels;
527  pixCountPixels(rect_pix, &all_pixels, nullptr);
528  rect_pix.destroy();
529  if (music_pixels < kMinMusicPixelFraction * all_pixels) {
530  // False positive. Delete from the music mask.
531  pixClearInRect(music_mask, box);
532  }
533  boxDestroy(&box);
534  }
535  boxaDestroy(&boxa);
536  if (music_mask.isZero()) {
537  music_mask.destroy();
538  } else {
539  pixSubtract(pix_vline, pix_vline, music_mask);
540  pixSubtract(pix_hline, pix_hline, music_mask);
541  // We may have deleted all the lines
542  v_empty = pix_vline.isZero();
543  h_empty = pix_hline.isZero();
544  }
545  }
546  return music_mask;
547 }
548 
549 // Most of the heavy lifting of line finding. Given src_pix and its separate
550 // resolution, returns image masks:
551 // pix_vline candidate vertical lines.
552 // pix_non_vline pixels that didn't look like vertical lines.
553 // pix_hline candidate horizontal lines.
554 // pix_non_hline pixels that didn't look like horizontal lines.
555 // pix_intersections pixels where vertical and horizontal lines meet.
556 // pix_music_mask candidate music staves.
557 // This function promises to initialize all the output (2nd level) pointers,
558 // but any of the returns that are empty will be nullptr on output.
559 // None of the input (1st level) pointers may be nullptr except pix_music_mask,
560 // which will disable music detection, and pixa_display.
561 void LineFinder::GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image *pix_non_vline,
562  Image *pix_hline, Image *pix_non_hline, Image *pix_intersections,
563  Image *pix_music_mask, Pixa *pixa_display) {
564  Image pix_closed = nullptr;
565  Image pix_hollow = nullptr;
566 
567  int max_line_width = resolution / kThinLineFraction;
568  int min_line_length = resolution / kMinLineLengthFraction;
569  if (pixa_display != nullptr) {
570  tprintf("Image resolution = %d, max line width = %d, min length=%d\n", resolution,
571  max_line_width, min_line_length);
572  }
573  int closing_brick = max_line_width / 3;
574 
575 // only use opencl if compiled w/ OpenCL and selected device is opencl
576 #ifdef USE_OPENCL
577  if (OpenclDevice::selectedDeviceIsOpenCL()) {
578  // OpenCL pixGetLines Operation
579  int clStatus =
580  OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix), pixGetHeight(src_pix), src_pix);
581  bool getpixclosed = pix_music_mask != nullptr;
582  OpenclDevice::pixGetLinesCL(nullptr, src_pix, pix_vline, pix_hline, &pix_closed, getpixclosed,
583  closing_brick, closing_brick, max_line_width, max_line_width,
584  min_line_length, min_line_length);
585  } else {
586 #endif
587  // Close up small holes, making it less likely that false alarms are found
588  // in thickened text (as it will become more solid) and also smoothing over
589  // some line breaks and nicks in the edges of the lines.
590  pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
591  if (pixa_display != nullptr) {
592  pixaAddPix(pixa_display, pix_closed, L_CLONE);
593  }
594  // Open up with a big box to detect solid areas, which can then be
595  // subtracted. This is very generous and will leave in even quite wide
596  // lines.
597  Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
598  if (pixa_display != nullptr) {
599  pixaAddPix(pixa_display, pix_solid, L_CLONE);
600  }
601  pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
602 
603  pix_solid.destroy();
604 
605  // Now open up in both directions independently to find lines of at least
606  // 1 inch/kMinLineLengthFraction in length.
607  if (pixa_display != nullptr) {
608  pixaAddPix(pixa_display, pix_hollow, L_CLONE);
609  }
610  *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
611  *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
612 
613  pix_hollow.destroy();
614 #ifdef USE_OPENCL
615  }
616 #endif
617 
618  // Lines are sufficiently rare, that it is worth checking for a zero image.
619  bool v_empty = pix_vline->isZero();
620  bool h_empty = pix_hline->isZero();
621  if (pix_music_mask != nullptr) {
622  if (!v_empty && !h_empty) {
623  *pix_music_mask =
624  FilterMusic(resolution, pix_closed, *pix_vline, *pix_hline, v_empty, h_empty);
625  } else {
626  *pix_music_mask = nullptr;
627  }
628  }
629  pix_closed.destroy();
630  Image pix_nonlines = nullptr;
631  *pix_intersections = nullptr;
632  Image extra_non_hlines = nullptr;
633  if (!v_empty) {
634  // Subtract both line candidates from the source to get definite non-lines.
635  pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline);
636  if (!h_empty) {
637  pixSubtract(pix_nonlines, pix_nonlines, *pix_hline);
638  // Intersections are a useful indicator for likelihood of being a line.
639  *pix_intersections = *pix_vline & *pix_hline;
640  // Candidate vlines are not hlines (apart from the intersections)
641  // and vice versa.
642  extra_non_hlines = pixSubtract(nullptr, *pix_vline, *pix_intersections);
643  }
644  *pix_non_vline = pixErodeBrick(nullptr, pix_nonlines, kMaxLineResidue, 1);
645  pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8);
646  if (!h_empty) {
647  // Candidate hlines are not vlines.
648  *pix_non_vline |= *pix_hline;
649  pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
650  }
651  if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, *pix_vline)) {
652  pix_vline->destroy(); // No candidates left.
653  }
654  } else {
655  // No vertical lines.
656  pix_vline->destroy();
657  *pix_non_vline = nullptr;
658  if (!h_empty) {
659  pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline);
660  }
661  }
662  if (h_empty) {
663  pix_hline->destroy();
664  *pix_non_hline = nullptr;
665  if (v_empty) {
666  return;
667  }
668  } else {
669  *pix_non_hline = pixErodeBrick(nullptr, pix_nonlines, 1, kMaxLineResidue);
670  pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
671  if (extra_non_hlines != nullptr) {
672  *pix_non_hline |= extra_non_hlines;
673  extra_non_hlines.destroy();
674  }
675  if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, *pix_hline)) {
676  pix_hline->destroy(); // No candidates left.
677  }
678  }
679  if (pixa_display != nullptr) {
680  if (*pix_vline != nullptr) {
681  pixaAddPix(pixa_display, *pix_vline, L_CLONE);
682  }
683  if (*pix_hline != nullptr) {
684  pixaAddPix(pixa_display, *pix_hline, L_CLONE);
685  }
686  if (pix_nonlines != nullptr) {
687  pixaAddPix(pixa_display, pix_nonlines, L_CLONE);
688  }
689  if (*pix_non_vline != nullptr) {
690  pixaAddPix(pixa_display, *pix_non_vline, L_CLONE);
691  }
692  if (*pix_non_hline != nullptr) {
693  pixaAddPix(pixa_display, *pix_non_hline, L_CLONE);
694  }
695  if (*pix_intersections != nullptr) {
696  pixaAddPix(pixa_display, *pix_intersections, L_CLONE);
697  }
698  if (pix_music_mask != nullptr && *pix_music_mask != nullptr) {
699  pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
700  }
701  }
702  pix_nonlines.destroy();
703 }
704 
705 // Returns a list of boxes corresponding to the candidate line segments. Sets
706 // the line_crossings member of the boxes so we can later determine the number
707 // of intersections touched by a full line.
708 void LineFinder::GetLineBoxes(bool horizontal_lines, Image pix_lines, Image pix_intersections,
709  C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs) {
710  // Put a single pixel crack in every line at an arbitrary spacing,
711  // so they break up and the bounding boxes can be used to get the
712  // direction accurately enough without needing outlines.
713  int wpl = pixGetWpl(pix_lines);
714  int width = pixGetWidth(pix_lines);
715  int height = pixGetHeight(pix_lines);
716  l_uint32 *data = pixGetData(pix_lines);
717  if (horizontal_lines) {
718  for (int y = 0; y < height; ++y, data += wpl) {
719  for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
720  CLEAR_DATA_BIT(data, x);
721  }
722  }
723  } else {
724  for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
725  memset(data + wpl * y, 0, wpl * sizeof(*data));
726  }
727  }
728  // Get the individual connected components
729  Boxa *boxa = pixConnComp(pix_lines, nullptr, 8);
730  ConvertBoxaToBlobs(width, height, &boxa, line_cblobs);
731  // Make the BLOBNBOXes from the C_BLOBs.
732  C_BLOB_IT blob_it(line_cblobs);
733  BLOBNBOX_IT bbox_it(line_bblobs);
734  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
735  C_BLOB *cblob = blob_it.data();
736  auto *bblob = new BLOBNBOX(cblob);
737  bbox_it.add_to_end(bblob);
738  // Determine whether the line segment touches two intersections.
739  const TBOX &bbox = bblob->bounding_box();
740  Box *box = boxCreate(bbox.left(), bbox.bottom(), bbox.width(), bbox.height());
741  bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections));
742  boxDestroy(&box);
743  // Transform the bounding box prior to finding lines. To save writing
744  // two line finders, flip x and y for horizontal lines and re-use the
745  // tab-stop detection code. For vertical lines we still have to flip the
746  // y-coordinates to switch from leptonica coords to tesseract coords.
747  if (horizontal_lines) {
748  // Note that we have Leptonica coords stored in a Tesseract box, so that
749  // bbox.bottom(), being the MIN y coord, is actually the top, so to get
750  // back to Leptonica coords in RemoveUnusedLineSegments, we have to
751  // use height - box.right() as the top, which looks very odd.
752  TBOX new_box(height - bbox.top(), bbox.left(), height - bbox.bottom(), bbox.right());
753  bblob->set_bounding_box(new_box);
754  } else {
755  TBOX new_box(bbox.left(), height - bbox.top(), bbox.right(), height - bbox.bottom());
756  bblob->set_bounding_box(new_box);
757  }
758  }
759 }
760 
761 } // namespace tesseract.
@ TBOX
const double kMinMusicPixelFraction
Definition: linefind.cpp:60
const double kMaxStaveHeight
Definition: linefind.cpp:58
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
const int kCrackSpacing
Spacing of cracks across the page to break up tall vertical lines.
Definition: linefind.cpp:43
const double kThickLengthMultiple
Definition: linefind.cpp:54
const int kMinThickLineWidth
Definition: linefind.cpp:47
const double kMaxNonLineDensity
Definition: linefind.cpp:56
void outlines_to_blobs(BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
Definition: edgblob.cpp:460
const int kMinLineLengthFraction
Denominator of resolution makes min pixels to demand line lengths to be.
Definition: linefind.cpp:41
const int kMaxLineResidue
Definition: linefind.cpp:51
const int kLineFindGridSize
Grid size used by line finder. Not very critical.
Definition: linefind.cpp:45
@ TT_VLINE
Definition: blobbox.h:67
@ TT_MAYBE_ALIGNED
Definition: blobbox.h:65
const int kThinLineFraction
Denominator of resolution makes max pixel width to allow thin lines.
Definition: linefind.cpp:39
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:30
TabType left_tab_type() const
Definition: blobbox.h:286
const TBOX & bounding_box() const
Definition: blobbox.h:239
void destroy()
Definition: image.cpp:32
C_BLOB_LIST * blob_list()
get blobs
Definition: ocrblock.h:123
integer coordinate
Definition: points.h:36
TDimension left() const
Definition: rect.h:82
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
static bool WithinTestRegion(int detail_level, int x, int y)
static void ConvertBoxaToBlobs(int image_width, int image_height, Boxa **boxes, C_BLOB_LIST *blobs)
Definition: linefind.cpp:316
static void FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x, int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines, TabVector_LIST *h_lines)
Definition: linefind.cpp:240
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:352