tesseract  5.0.0
tesseract::ParagraphModelSmearer Class Reference

#include <paragraphs_internal.h>

Public Member Functions

 ParagraphModelSmearer (std::vector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
 
void Smear ()
 

Detailed Description

Definition at line 239 of file paragraphs_internal.h.

Constructor & Destructor Documentation

◆ ParagraphModelSmearer()

tesseract::ParagraphModelSmearer::ParagraphModelSmearer ( std::vector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory theory 
)

Definition at line 1363 of file paragraphs.cpp.

1365  : theory_(theory), rows_(rows), row_start_(row_start), row_end_(row_end) {
1366  if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
1367  row_start_ = 0;
1368  row_end_ = 0;
1369  return;
1370  }
1371  open_models_.resize(open_models_.size() + row_end - row_start + 2);
1372 }

Member Function Documentation

◆ Smear()

void tesseract::ParagraphModelSmearer::Smear ( )

Definition at line 1407 of file paragraphs.cpp.

1407  {
1408  CalculateOpenModels(row_start_, row_end_);
1409 
1410  // For each row which we're unsure about (that is, it is LT_UNKNOWN or
1411  // we have multiple LT_START hypotheses), see if there's a model that
1412  // was recently used (an "open" model) which might model it well.
1413  for (int i = row_start_; i < row_end_; i++) {
1414  RowScratchRegisters &row = (*rows_)[i];
1415  if (row.ri_->num_words == 0) {
1416  continue;
1417  }
1418 
1419  // Step One:
1420  // Figure out if there are "open" models which are left-alined or
1421  // right-aligned. This is important for determining whether the
1422  // "first" word in a row would fit at the "end" of the previous row.
1423  bool left_align_open = false;
1424  bool right_align_open = false;
1425  for (auto &m : OpenModels(i)) {
1426  switch (m->justification()) {
1427  case JUSTIFICATION_LEFT:
1428  left_align_open = true;
1429  break;
1430  case JUSTIFICATION_RIGHT:
1431  right_align_open = true;
1432  break;
1433  default:
1434  left_align_open = right_align_open = true;
1435  }
1436  }
1437  // Step Two:
1438  // Use that knowledge to figure out if this row is likely to
1439  // start a paragraph.
1440  bool likely_start;
1441  if (i == 0) {
1442  likely_start = true;
1443  } else {
1444  if ((left_align_open && right_align_open) || (!left_align_open && !right_align_open)) {
1445  likely_start = LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_LEFT) ||
1446  LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_RIGHT);
1447  } else if (left_align_open) {
1448  likely_start = LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_LEFT);
1449  } else {
1450  likely_start = LikelyParagraphStart((*rows_)[i - 1], row, JUSTIFICATION_RIGHT);
1451  }
1452  }
1453 
1454  // Step Three:
1455  // If this text line seems like an obvious first line of an
1456  // open model, or an obvious continuation of an existing
1457  // modelled paragraph, mark it up.
1458  if (likely_start) {
1459  // Add Start Hypotheses for all Open models that fit.
1460  for (unsigned m = 0; m < OpenModels(i).size(); m++) {
1461  if (ValidFirstLine(rows_, i, OpenModels(i)[m])) {
1462  row.AddStartLine(OpenModels(i)[m]);
1463  }
1464  }
1465  } else {
1466  // Add relevant body line hypotheses.
1467  SetOfModels last_line_models;
1468  if (i > 0) {
1469  (*rows_)[i - 1].StrongHypotheses(&last_line_models);
1470  } else {
1471  theory_->NonCenteredModels(&last_line_models);
1472  }
1473  for (auto model : last_line_models) {
1474  if (ValidBodyLine(rows_, i, model)) {
1475  row.AddBodyLine(model);
1476  }
1477  }
1478  }
1479 
1480  // Step Four:
1481  // If we're still quite unsure about this line, go through all
1482  // models in our theory and see if this row could be the start
1483  // of any of our models.
1484  if (row.GetLineType() == LT_UNKNOWN ||
1485  (row.GetLineType() == LT_START && !row.UniqueStartHypothesis())) {
1486  SetOfModels all_models;
1487  theory_->NonCenteredModels(&all_models);
1488  for (auto &all_model : all_models) {
1489  if (ValidFirstLine(rows_, i, all_model)) {
1490  row.AddStartLine(all_model);
1491  }
1492  }
1493  }
1494  // Step Five:
1495  // Since we may have updated the hypotheses about this row, we need
1496  // to recalculate the Open models for the rest of rows[i + 1, row_end)
1497  if (row.GetLineType() != LT_UNKNOWN) {
1498  CalculateOpenModels(i + 1, row_end_);
1499  }
1500  }
1501 }
@ JUSTIFICATION_LEFT
Definition: publictypes.h:250
@ JUSTIFICATION_RIGHT
Definition: publictypes.h:252
std::vector< const ParagraphModel * > SetOfModels
bool ValidBodyLine(const std::vector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
bool ValidFirstLine(const std::vector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
void NonCenteredModels(SetOfModels *models)

The documentation for this class was generated from the following files: