Tesseract
Installation:
# Install dependencies
brew install automake autoconf autoconf-archive libtool
brew install pkgconfig
brew install icu4c
brew install leptonica
brew install gcc
brew install pango
# Download Tesseract master
git clone https://github.com/tesseract-ocr/tesseract/
cd tesseract
# compile
./autogen.sh
./configure CC=gcc-7 CXX=g++-7 CPPFLAGS=-I/usr/local/opt/icu4c/include LDFLAGS=-L/usr/local/opt/icu4c/lib
make -j
sudo make install # if desired
make training # if installed with training dependencies
# Tesseract should now be in /usr/local/bin
which tesseract
Usage:
# Download the necessary language data:
https://github.com/tesseract-ocr/tesseract/wiki/Data-Files
# Set Variable so tesseract can find the data files:
export TESSDATA_PREFIX=~/Repositories/playground/tesseract/tessdata
# Run Tesseract to find Text in Images:
tesseract -l eng image_eng.jpg output
tesseract -l deu image_deu.jpg output
# You can also combine languages:
tesseract -l deu+eng image_multi.jpg output
# Iterate through folder:
for image in *; do tesseract -l eng ${image} ${image%.*}; done