Building a Chrome Extension for Image OCR Using Python and Tesseract
This tutorial walks through creating a Chrome extension that captures images from web pages, sends them to a Python‑backed Tesseract OCR engine, and displays the recognized text, covering the plugin's file structure, manifest configuration, JavaScript code, HTML UI, CSS styling, and installation steps.
In this guide we demonstrate how to develop a Chrome extension that performs OCR on images using Python and the Tesseract library, providing a complete workflow from file structure to deployment.
The extension consists of the following core files:
manifest.json
content_script.js
background_script.js
popup.html
popup.js
icon.png2.1 Create the plugin file structure
Place the files listed above in a dedicated directory.
2.2 Write manifest.json
{
"manifest_version": 2,
"name": "Smart Image OCR",
"version": "1.0",
"description": "A smart Chrome extension for OCR text recognition from images.",
"icons": {"16": "icon.png", "48": "icon.png", "128": "icon.png"},
"browser_action": {"default_icon": {"16": "icon.png", "48": "icon.png", "128": "icon.png"}, "default_popup": "popup.html"},
"permissions": ["activeTab", "storage"],
"content_scripts": [{"matches": ["http://*/*", "https://*/*"], "js": ["content_script.js"]}],
"background": {"scripts": ["background_script.js"], "persistent": false}
}2.3 Write content_script.js
// Listen for messages from the popup
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
if (request.action === 'uploadImage') {
var imageData = request.imageData;
// Forward image data to background script for OCR
chrome.runtime.sendMessage({action: 'recognizeImage', imageData: imageData}, function(response) {
sendResponse({success: true, result: response.result});
});
return true;
}
});2.4 Write background_script.js
// Listen for OCR requests from content script
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
if (request.action === 'recognizeImage') {
var imageData = request.imageData;
var result = recognizeText(imageData);
sendResponse({result: result});
}
});
// Simple wrapper for Tesseract OCR (replace with real implementation)
function recognizeText(imageData) {
// OCR logic goes here, calling Tesseract library
return 'Hello, OCR!';
}2.5 Write popup.html
<!DOCTYPE html>
<html>
<head>
<title>Smart Image OCR</title>
<link rel="stylesheet" href="popup.css">
</head>
<body>
<input type="file" id="imageUpload" accept="image/*">
<button id="uploadButton">Upload Image</button>
<div id="result"></div>
<script src="popup.js"></script>
</body>
</html>2.6 Write popup.js
document.addEventListener('DOMContentLoaded', function() {
var imageUpload = document.getElementById('imageUpload');
var uploadButton = document.getElementById('uploadButton');
var resultDiv = document.getElementById('result');
uploadButton.addEventListener('click', function() {
imageUpload.click();
});
imageUpload.addEventListener('change', function() {
var file = imageUpload.files[0];
if (file) {
var reader = new FileReader();
reader.onload = function(e) {
var imageData = e.target.result;
chrome.tabs.query({active: true, currentWindow: true}, function(tabs) {
chrome.tabs.sendMessage(tabs[0].id, {action: 'uploadImage', imageData: imageData}, function(response) {
if (response && response.success) {
resultDiv.textContent = 'OCR Result: ' + response.result;
} else {
resultDiv.textContent = 'Failed to recognize image.';
}
});
});
};
reader.readAsDataURL(file);
}
});
});2.7 Add CSS in popup.css
body { width: 200px; padding: 10px; }
input[type="file"] { display: none; }
button { margin-top: 10px; }
#result { margin-top: 10px; }2.8 Add an icon file named icon.png to the extension directory.
3. Install and debug the extension
Open chrome://extensions/ in Chrome, enable Developer Mode, click “Load unpacked”, select the extension folder, and then use the toolbar icon to open the popup, upload an image, and view the OCR result.
By following these steps you will have a functional Chrome extension that leverages Python and Tesseract to recognize text in images directly from web pages.
Test Development Learning Exchange
Test Development Learning Exchange
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.