|
| 1 | +use tesseract_plumbing::Text; |
| 2 | +use tesseract_sys::{PageIteratorLevel, PolyBlockType}; |
| 3 | + |
| 4 | +/// Iterate through the OCR results of an image |
| 5 | +/// |
| 6 | +/// As the results are ephemeral, the `next` function only yields references to these results. |
| 7 | +/// ``` |
| 8 | +/// use tesseract::{Tesseract, iterator::TextlineResult}; |
| 9 | +/// use crate::tesseract::TesseractIteratorResult; |
| 10 | +/// use tesseract_sys::PolyBlockType; |
| 11 | +/// |
| 12 | +/// let mut tesseract = Tesseract::new(None, Some("eng")).unwrap(); |
| 13 | +/// let mut tesseract = tesseract.set_image("./img.png").expect("Failed to set image"); |
| 14 | +/// let mut tesseract = tesseract.recognize().unwrap(); |
| 15 | +/// let mut tesseract_iterator = tesseract.iterator().unwrap(); |
| 16 | +
|
| 17 | +/// while let Some(textline) = tesseract_iterator.next::<TextlineResult>(None) { |
| 18 | +/// assert_eq!(PolyBlockType::PT_FLOWING_TEXT ,textline.block_type()); |
| 19 | +/// } |
| 20 | +/// ``` |
| 21 | +pub struct ResultIterator(tesseract_plumbing::ResultIterator, bool); |
| 22 | + |
| 23 | +pub struct BlockResult<'a>(&'a tesseract_plumbing::ResultIterator); |
| 24 | +pub struct ParagraphResult<'a>(&'a tesseract_plumbing::ResultIterator); |
| 25 | +pub struct TextlineResult<'a>(&'a tesseract_plumbing::ResultIterator); |
| 26 | +pub struct WordResult<'a>(&'a tesseract_plumbing::ResultIterator); |
| 27 | +pub struct SymbolResult<'a>(&'a tesseract_plumbing::ResultIterator); |
| 28 | + |
| 29 | +impl ResultIterator { |
| 30 | + pub(crate) fn new(iterator: tesseract_plumbing::ResultIterator) -> Self { |
| 31 | + Self(iterator, true) |
| 32 | + } |
| 33 | + pub fn next<'a, T>(&'a mut self, limit: Option<PageIteratorLevel>) -> Option<T> |
| 34 | + where |
| 35 | + T: TesseractIteratorResult<'a>, |
| 36 | + { |
| 37 | + let p = *self.0.as_ref(); |
| 38 | + if self.1 { |
| 39 | + self.1 = false; |
| 40 | + return Some(T::from(&self.0)); |
| 41 | + } |
| 42 | + let end_of_page_reached = |
| 43 | + unsafe { tesseract_sys::TessPageIteratorNext(p.cast(), T::LEVEL as u32) == 0 }; |
| 44 | + if end_of_page_reached { |
| 45 | + return None; |
| 46 | + } |
| 47 | + Some(T::from(&self.0)) |
| 48 | + } |
| 49 | +} |
| 50 | + |
| 51 | +pub trait TesseractIteratorResult<'a> |
| 52 | +where |
| 53 | + Self: From<&'a tesseract_plumbing::ResultIterator> |
| 54 | + + AsRef<tesseract_plumbing::ResultIterator> |
| 55 | + + 'a, |
| 56 | +{ |
| 57 | + /// The equivalent PageIteratorLevel of this result |
| 58 | + const LEVEL: PageIteratorLevel; |
| 59 | + /// Get the text contained of the iteration result |
| 60 | + fn get_text(&self) -> Option<Text> { |
| 61 | + let c_str = unsafe { |
| 62 | + tesseract_sys::TessResultIteratorGetUTF8Text( |
| 63 | + self.as_ref().as_ref().cast(), |
| 64 | + Self::LEVEL as u32, |
| 65 | + ) |
| 66 | + }; |
| 67 | + if c_str.is_null() { |
| 68 | + return None; |
| 69 | + } |
| 70 | + Some(unsafe { tesseract_plumbing::Text::new(c_str) }) |
| 71 | + } |
| 72 | + |
| 73 | + /// Get the bounding box of the iteration result |
| 74 | + fn bounding_box(&self) -> BoundingBox { |
| 75 | + let mut left = 0; |
| 76 | + let mut right = 0; |
| 77 | + let mut top = 0; |
| 78 | + let mut bottom = 0; |
| 79 | + // TODO: Use this to verify |
| 80 | + let _object_at_pos = unsafe { |
| 81 | + tesseract_sys::TessPageIteratorBoundingBox( |
| 82 | + self.as_ref().as_ref().cast(), |
| 83 | + Self::LEVEL as u32, |
| 84 | + &mut left, |
| 85 | + &mut top, |
| 86 | + &mut right, |
| 87 | + &mut bottom, |
| 88 | + ) |
| 89 | + }; |
| 90 | + BoundingBox { |
| 91 | + left, |
| 92 | + top, |
| 93 | + right, |
| 94 | + bottom, |
| 95 | + } |
| 96 | + } |
| 97 | + |
| 98 | + /// Check what the current block type is |
| 99 | + fn block_type(&self) -> PolyBlockType { |
| 100 | + let block_type = |
| 101 | + unsafe { tesseract_sys::TessPageIteratorBlockType(self.as_ref().as_ref().cast()) }; |
| 102 | + unsafe { std::mem::transmute(block_type) } // TODO: This doesn't check that the value is valid |
| 103 | + } |
| 104 | +} |
| 105 | + |
| 106 | +#[derive(Debug)] |
| 107 | +pub struct BoundingBox { |
| 108 | + left: i32, |
| 109 | + top: i32, |
| 110 | + right: i32, |
| 111 | + bottom: i32, |
| 112 | +} |
| 113 | + |
| 114 | +/// All results implement the same basic functionality, but with slight differences in the PageIteratorLevel |
| 115 | +macro_rules! result_impls { |
| 116 | + ($name:ident -> $level:expr $(, $($tts:tt)*)?) => { |
| 117 | + |
| 118 | + impl<'a> TesseractIteratorResult<'a> for $name<'a> |
| 119 | + where Self: From<&'a tesseract_plumbing::ResultIterator> + AsRef<tesseract_plumbing::ResultIterator> + 'a |
| 120 | + { |
| 121 | + const LEVEL: PageIteratorLevel = $level; |
| 122 | + } |
| 123 | + |
| 124 | + impl<'a> From<&'a tesseract_plumbing::ResultIterator> for $name<'a> { |
| 125 | + fn from(value: &'a tesseract_plumbing::ResultIterator) -> $name<'a> { |
| 126 | + $name(value) |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + impl AsRef<tesseract_plumbing::ResultIterator> for $name<'_> { |
| 131 | + fn as_ref(&self) -> &tesseract_plumbing::ResultIterator { |
| 132 | + self.0 |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + result_impls!($($($tts)*)?); |
| 137 | + }; |
| 138 | + () => {}; |
| 139 | +} |
| 140 | + |
| 141 | +result_impls!( |
| 142 | + BlockResult -> PageIteratorLevel::RIL_BLOCK, |
| 143 | + ParagraphResult -> PageIteratorLevel::RIL_PARA, |
| 144 | + TextlineResult -> PageIteratorLevel::RIL_TEXTLINE, |
| 145 | + WordResult -> PageIteratorLevel::RIL_WORD, |
| 146 | + SymbolResult -> PageIteratorLevel::RIL_SYMBOL |
| 147 | +); |
0 commit comments