c++ocrtesseractbitblt

How to make tesseract-ocr read from coordinates on a screen?


I have been trying to look for an example of how to make a class/function that would attempt to read text from a the screen at specified coordinates.

Something simple that would use bitblt to capture the specified section of the screen and run tesseract on it. All done in memory without having to create image files to disk.

Tesseract seems to have really poor API and requires a TIF image of all things, as far as I can see it can't even be made to accept a bitmap memory image without extensive delving into its code.

Any help would be appreciated, an actual example would be ideal.


Solution

  • http://i.imgur.com/HaJ2zOI.png enter image description here

    Read on/view the below to see how to use Tesseract-OCR with images from memory..

    #include <iostream>
    #include <vector>
    #include <stdexcept>
    #include <fstream>
    #include <memory>
    #include <cstring>
    #include <tesseract/baseapi.h>
    #include <leptonica/allheaders.h>
    
    #if defined _WIN32 || defined _WIN64
    #include <windows.h>
    #endif
    
    class Image
    {
        private:
            std::vector<std::uint8_t> Pixels;
            std::uint32_t width, height;
            std::uint16_t BitsPerPixel;
    
            void Flip(void* In, void* Out, int width, int height, unsigned int Bpp);
    
        public:
            #if defined _WIN32 || defined _WIN64
            explicit Image(HDC DC, int X, int Y, int Width, int Height);
            #endif
    
            inline std::uint16_t GetBitsPerPixel() {return this->BitsPerPixel;}
            inline std::uint16_t GetBytesPerPixel() {return this->BitsPerPixel / 8;}
            inline std::uint16_t GetBytesPerScanLine() {return (this->BitsPerPixel / 8) * this->width;}
            inline int GetWidth() const {return this->width;}
            inline int GetHeight() const {return this->height;}
            inline const std::uint8_t* GetPixels() {return this->Pixels.data();}
    };
    
    void Image::Flip(void* In, void* Out, int width, int height, unsigned int Bpp)
    {
       unsigned long Chunk = (Bpp > 24 ? width * 4 : width * 3 + width % 4);
       unsigned char* Destination = static_cast<unsigned char*>(Out);
       unsigned char* Source = static_cast<unsigned char*>(In) + Chunk * (height - 1);
    
       while(Source != In)
       {
          std::memcpy(Destination, Source, Chunk);
          Destination += Chunk;
          Source -= Chunk;
       }
    }
    
    #if defined _WIN32 || defined _WIN64
    Image::Image(HDC DC, int X, int Y, int Width, int Height) : Pixels(), width(Width), height(Height), BitsPerPixel(32)
    {
        BITMAP Bmp = {0};
        HBITMAP hBmp = reinterpret_cast<HBITMAP>(GetCurrentObject(DC, OBJ_BITMAP));
    
        if (GetObject(hBmp, sizeof(BITMAP), &Bmp) == 0)
            throw std::runtime_error("BITMAP DC NOT FOUND.");
    
        RECT area = {X, Y, X + Width, Y + Height};
        HWND Window = WindowFromDC(DC);
        GetClientRect(Window, &area);
    
        HDC MemDC = GetDC(nullptr);
        HDC SDC = CreateCompatibleDC(MemDC);
        HBITMAP hSBmp = CreateCompatibleBitmap(MemDC, width, height);
        DeleteObject(SelectObject(SDC, hSBmp));
    
        BitBlt(SDC, 0, 0, width, height, DC, X, Y, SRCCOPY);
        unsigned int data_size = ((width * BitsPerPixel + 31) / 32) * 4 * height;
        std::vector<std::uint8_t> Data(data_size);
        this->Pixels.resize(data_size);
    
        BITMAPINFO Info = {sizeof(BITMAPINFOHEADER), static_cast<long>(width), static_cast<long>(height), 1, BitsPerPixel, BI_RGB, data_size, 0, 0, 0, 0};
        GetDIBits(SDC, hSBmp, 0, height, &Data[0], &Info, DIB_RGB_COLORS);
        this->Flip(&Data[0], &Pixels[0], width, height, BitsPerPixel);
    
        DeleteDC(SDC);
        DeleteObject(hSBmp);
        ReleaseDC(nullptr, MemDC);
    }
    #endif
    
    int main()
    {
        #if defined _WIN32 || defined _WIN64
        HWND SomeWindowHandle = GetDesktopWindow();
        HDC DC = GetDC(SomeWindowHandle);
    
        Image Img = Image(DC, 0, 0, 200, 200); //screenshot of 0, 0, 200, 200..
    
        ReleaseDC(SomeWindowHandle, DC);
        #else
        Image Img = Image(some_pixel_pointer, 200, 200); //pointer to pixels..
        #endif
    
        std::unique_ptr<tesseract::TessBaseAPI> tesseract_ptr(new tesseract::TessBaseAPI());
    
        tesseract_ptr->Init("/tesseract/tessdata', 'eng");
        tesseract_ptr->SetImage(Img.GetPixels(), Img.GetWidth(), Img.GetHeight(), Img.GetBytesPerPixel(), Img.GetBytesPerScanLine()); //Fixed this line..
    
        std::unique_ptr<char[]> utf8_text_ptr(tesseract_ptr->GetUTF8Text());
    
        std::cout<<utf8_text_ptr.get()<<"\n";
    
        return 0;
    }