Obtains the number of pages which contains a document in HOCR format.
 

1 hocr.getNumberOfPages

<hocr.getNumberOfPages>
    <hocr text /> +
</hocr.getNumberOfPages>
Example

Obtains the text using a HOCR document.

Copy
<xsql-script name='hocr.getNumberOfPages'>
        <body>
            <set name='m_ocr_text'><![CDATA[
            <html xmlns="http://www.w3.org/1999/xhtml">
              <body>
                <div class="ocr_page" title="bbox 0 0 2548 3300; image /path/to/scanned/image.png">
                  <span class="ocr_line" title="bbox 659 143 863 177">Some Text</span>
                  <span class="ocr_line" title="bbox 723 275 916 324">More Text</span>
                </div>
              </body>
            </html>] ]>
            </set>

            <set name='m_numberofpages'>
                <hocr.getNumberOfPages>
                        <m_hocr_text />
                </hocr.getNumberOfPages>
            </set>
            <for name='m_page' start='0' end='#m_numberofpages'>
                <do>
                    <println>
                        <hocr.getTextFromPage page='#m_page'>
                            <m_ocr_text />
                        </hocr.getTextFromPage>
                    </println>
                </do>
            </for>
        </body>
    </xsql-script>