tesseract  5.0.0
tesseract::DocumentCache Class Reference

#include <imagedata.h>

Public Member Functions

TESS_API DocumentCache (int64_t max_memory)
 
TESS_API ~DocumentCache ()
 
void Clear ()
 
TESS_API bool LoadDocuments (const std::vector< std::string > &filenames, CachingStrategy cache_strategy, FileReader reader)
 
bool AddToCache (DocumentData *data)
 
DocumentDataFindDocument (const std::string &document_name) const
 
const ImageDataGetPageBySerial (int serial)
 
const std::vector< DocumentData * > & documents () const
 
TESS_API int TotalPages ()
 

Detailed Description

Definition at line 288 of file imagedata.h.

Constructor & Destructor Documentation

◆ DocumentCache()

tesseract::DocumentCache::DocumentCache ( int64_t  max_memory)
explicit

Definition at line 604 of file imagedata.cpp.

604 : max_memory_(max_memory) {}

◆ ~DocumentCache()

tesseract::DocumentCache::~DocumentCache ( )

Definition at line 606 of file imagedata.cpp.

606  {
607  for (auto *document : documents_) {
608  delete document;
609  }
610 }

Member Function Documentation

◆ AddToCache()

bool tesseract::DocumentCache::AddToCache ( DocumentData data)

Definition at line 641 of file imagedata.cpp.

641  {
642  documents_.push_back(data);
643  return true;
644 }

◆ Clear()

void tesseract::DocumentCache::Clear ( )
inline

Definition at line 296 of file imagedata.h.

296  {
297  for (auto *document : documents_) {
298  delete document;
299  }
300  documents_.clear();
301  num_pages_per_doc_ = 0;
302  }

◆ documents()

const std::vector<DocumentData *>& tesseract::DocumentCache::documents ( ) const
inline

Definition at line 325 of file imagedata.h.

325  {
326  return documents_;
327  }

◆ FindDocument()

DocumentData * tesseract::DocumentCache::FindDocument ( const std::string &  document_name) const

Definition at line 647 of file imagedata.cpp.

648  {
649  for (auto *document : documents_) {
650  if (document->document_name() == document_name) {
651  return document;
652  }
653  }
654  return nullptr;
655 }

◆ GetPageBySerial()

const ImageData* tesseract::DocumentCache::GetPageBySerial ( int  serial)
inline

Definition at line 317 of file imagedata.h.

317  {
318  if (cache_strategy_ == CS_SEQUENTIAL) {
319  return GetPageSequential(serial);
320  } else {
321  return GetPageRoundRobin(serial);
322  }
323  }
@ CS_SEQUENTIAL
Definition: imagedata.h:49

◆ LoadDocuments()

bool tesseract::DocumentCache::LoadDocuments ( const std::vector< std::string > &  filenames,
CachingStrategy  cache_strategy,
FileReader  reader 
)

Definition at line 614 of file imagedata.cpp.

616  {
617  cache_strategy_ = cache_strategy;
618  int64_t fair_share_memory = 0;
619  // In the round-robin case, each DocumentData handles restricting its content
620  // to its fair share of memory. In the sequential case, DocumentCache
621  // determines which DocumentDatas are held entirely in memory.
622  if (cache_strategy_ == CS_ROUND_ROBIN) {
623  fair_share_memory = max_memory_ / filenames.size();
624  }
625  for (const auto &filename : filenames) {
626  auto *document = new DocumentData(filename);
627  document->SetDocument(filename.c_str(), fair_share_memory, reader);
628  AddToCache(document);
629  }
630  if (!documents_.empty()) {
631  // Try to get the first page now to verify the list of filenames.
632  if (GetPageBySerial(0) != nullptr) {
633  return true;
634  }
635  tprintf("Load of page 0 failed!\n");
636  }
637  return false;
638 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ CS_ROUND_ROBIN
Definition: imagedata.h:54
bool AddToCache(DocumentData *data)
Definition: imagedata.cpp:641
const ImageData * GetPageBySerial(int serial)
Definition: imagedata.h:317

◆ TotalPages()

int tesseract::DocumentCache::TotalPages ( )

Definition at line 659 of file imagedata.cpp.

659  {
660  if (cache_strategy_ == CS_SEQUENTIAL) {
661  // In sequential mode, we assume each doc has the same number of pages
662  // whether it is true or not.
663  if (num_pages_per_doc_ == 0) {
664  GetPageSequential(0);
665  }
666  return num_pages_per_doc_ * documents_.size();
667  }
668  int total_pages = 0;
669  for (auto *document : documents_) {
670  // We have to load a page to make NumPages() valid.
671  document->GetPage(0);
672  total_pages += document->NumPages();
673  }
674  return total_pages;
675 }

The documentation for this class was generated from the following files: