tesseract  5.0.0
tesseract::UNICHARMAP Class Reference

#include <unicharmap.h>

Public Member Functions

 UNICHARMAP ()
 
 ~UNICHARMAP ()
 
void insert (const char *const unichar_repr, UNICHAR_ID id)
 
UNICHAR_ID unichar_to_id (const char *const unichar_repr, int length) const
 
bool contains (const char *const unichar_repr, int length) const
 
int minmatch (const char *const unichar_repr) const
 
void clear ()
 

Detailed Description

Definition at line 29 of file unicharmap.h.

Constructor & Destructor Documentation

◆ UNICHARMAP()

tesseract::UNICHARMAP::UNICHARMAP ( )

Definition at line 27 of file unicharmap.cpp.

27 : nodes(nullptr) {}

◆ ~UNICHARMAP()

tesseract::UNICHARMAP::~UNICHARMAP ( )

Definition at line 29 of file unicharmap.cpp.

29  {
30  delete[] nodes;
31 }

Member Function Documentation

◆ clear()

void tesseract::UNICHARMAP::clear ( )

Definition at line 123 of file unicharmap.cpp.

123  {
124  delete[] nodes;
125  nodes = nullptr;
126 }

◆ contains()

bool tesseract::UNICHARMAP::contains ( const char *const  unichar_repr,
int  length 
) const

Definition at line 83 of file unicharmap.cpp.

83  {
84  if (unichar_repr == nullptr || *unichar_repr == '\0') {
85  return false;
86  }
87  if (length <= 0 || length > UNICHAR_LEN) {
88  return false;
89  }
90  int index = 0;
91  if (unichar_repr[index] == '\0') {
92  return false;
93  }
94  UNICHARMAP_NODE *current_nodes = nodes;
95 
96  while (current_nodes != nullptr && index + 1 < length && unichar_repr[index + 1] != '\0') {
97  current_nodes = current_nodes[static_cast<unsigned char>(unichar_repr[index])].children;
98  ++index;
99  }
100  return current_nodes != nullptr && (index + 1 >= length || unichar_repr[index + 1] == '\0') &&
101  current_nodes[static_cast<unsigned char>(unichar_repr[index])].id >= 0;
102 }
#define UNICHAR_LEN
Definition: unichar.h:33

◆ insert()

void tesseract::UNICHARMAP::insert ( const char *const  unichar_repr,
UNICHAR_ID  id 
)

Definition at line 59 of file unicharmap.cpp.

59  {
60  const char *current_char = unichar_repr;
61  if (*current_char == '\0') {
62  return;
63  }
64  UNICHARMAP_NODE **current_nodes_pointer = &nodes;
65  do {
66  if (*current_nodes_pointer == nullptr) {
67  *current_nodes_pointer = new UNICHARMAP_NODE[256];
68  }
69  if (current_char[1] == '\0') {
70  (*current_nodes_pointer)[static_cast<unsigned char>(*current_char)].id = id;
71  return;
72  }
73  current_nodes_pointer =
74  &((*current_nodes_pointer)[static_cast<unsigned char>(*current_char)].children);
75  ++current_char;
76  } while (true);
77 }

◆ minmatch()

int tesseract::UNICHARMAP::minmatch ( const char *const  unichar_repr) const

Definition at line 106 of file unicharmap.cpp.

106  {
107  const char *current_char = unichar_repr;
108  if (*current_char == '\0') {
109  return 0;
110  }
111  UNICHARMAP_NODE *current_nodes = nodes;
112 
113  while (current_nodes != nullptr && *current_char != '\0') {
114  if (current_nodes[static_cast<unsigned char>(*current_char)].id >= 0) {
115  return current_char + 1 - unichar_repr;
116  }
117  current_nodes = current_nodes[static_cast<unsigned char>(*current_char)].children;
118  ++current_char;
119  }
120  return 0;
121 }

◆ unichar_to_id()

UNICHAR_ID tesseract::UNICHARMAP::unichar_to_id ( const char *const  unichar_repr,
int  length 
) const

Definition at line 36 of file unicharmap.cpp.

36  {
37  UNICHARMAP_NODE *current_nodes = nodes;
38 
39  assert(*unichar_repr != '\0');
40  assert(length > 0 && length <= UNICHAR_LEN);
41 
42  int index = 0;
43  if (length <= 0 || unichar_repr[index] == '\0') {
44  return INVALID_UNICHAR_ID;
45  }
46  do {
47  if (index + 1 >= length || unichar_repr[index + 1] == '\0') {
48  return current_nodes[static_cast<unsigned char>(unichar_repr[index])].id;
49  }
50  current_nodes = current_nodes[static_cast<unsigned char>(unichar_repr[index])].children;
51  ++index;
52  } while (true);
53 }

The documentation for this class was generated from the following files: