Skip to content

Instantly share code, notes, and snippets.

@demensdeum
Last active August 29, 2025 15:34
Show Gist options
  • Select an option

  • Save demensdeum/a2162b780ed3b2f7c57e641de50ab3be to your computer and use it in GitHub Desktop.

Select an option

Save demensdeum/a2162b780ed3b2f7c57e641de50ab3be to your computer and use it in GitHub Desktop.
Gemini based OCR (add your api key)
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Image OCR Tool</title>
<script src="https://cdn.tailwindcss.com"></script>
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap');
body {
font-family: 'Inter', sans-serif;
background-color: #f3f4f6;
display: flex;
justify-content: center;
align-items: center;
min-height: 100vh;
}
</style>
</head>
<body class="bg-gray-100 p-4">
<div class="bg-white p-6 md:p-10 rounded-xl shadow-lg w-full max-w-2xl text-center">
<h1 class="text-3xl font-bold mb-4 text-gray-800">Image OCR</h1>
<p class="text-gray-600 mb-6">Upload an image and click "Recognize" to extract text.</p>
<div class="mb-6 border-dashed border-2 border-gray-300 rounded-lg p-6 hover:border-blue-400 transition-colors duration-200 cursor-pointer" onclick="document.getElementById('fileInput').click()">
<input type="file" id="fileInput" accept="image/*" class="hidden">
<p class="text-gray-500">Click to select an image</p>
<p class="text-xs text-gray-400 mt-1">Supported formats: JPG, PNG</p>
</div>
<div id="imagePreview" class="mb-6 hidden">
<img id="previewImage" src="#" alt="Image preview" class="max-w-full h-auto mx-auto rounded-lg shadow-md">
</div>
<button id="recognizeButton" class="w-full bg-blue-600 hover:bg-blue-700 text-white font-medium py-3 rounded-lg shadow-md transition-colors duration-200 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-opacity-50 disabled:bg-blue-300" disabled>
Recognize Text
</button>
<div id="loading" class="mt-6 hidden">
<div class="flex items-center justify-center">
<div class="animate-spin rounded-full h-8 w-8 border-t-2 border-b-2 border-blue-500"></div>
<p class="ml-3 text-gray-600">Processing image...</p>
</div>
</div>
<div id="resultBox" class="mt-6 hidden">
<h2 class="text-xl font-semibold mb-2 text-gray-700">Extracted Text:</h2>
<div class="bg-gray-50 border border-gray-200 p-4 rounded-lg text-left whitespace-pre-wrap break-words">
<p id="resultText" class="text-gray-800"></p>
</div>
<button id="copyButton" class="mt-4 w-full bg-gray-200 hover:bg-gray-300 text-gray-800 font-medium py-2 rounded-lg transition-colors duration-200 focus:outline-none focus:ring-2 focus:ring-gray-400 focus:ring-opacity-50">
Copy Text
</button>
</div>
<div id="errorBox" class="mt-6 p-4 bg-red-100 border border-red-400 text-red-700 rounded-lg hidden">
<p id="errorText"></p>
</div>
</div>
<script>
const fileInput = document.getElementById('fileInput');
const recognizeButton = document.getElementById('recognizeButton');
const loading = document.getElementById('loading');
const resultBox = document.getElementById('resultBox');
const resultText = document.getElementById('resultText');
const imagePreview = document.getElementById('imagePreview');
const previewImage = document.getElementById('previewImage');
const errorBox = document.getElementById('errorBox');
const errorText = document.getElementById('errorText');
const copyButton = document.getElementById('copyButton');
fileInput.addEventListener('change', function(event) {
const file = event.target.files[0];
if (file) {
const reader = new FileReader();
reader.onload = function(e) {
previewImage.src = e.target.result;
imagePreview.classList.remove('hidden');
recognizeButton.disabled = false;
resultBox.classList.add('hidden');
errorBox.classList.add('hidden');
};
reader.readAsDataURL(file);
} else {
imagePreview.classList.add('hidden');
recognizeButton.disabled = true;
}
});
recognizeButton.addEventListener('click', async () => {
const file = fileInput.files[0];
if (!file) {
showError('Please select an image first.');
return;
}
recognizeButton.disabled = true;
loading.classList.remove('hidden');
resultBox.classList.add('hidden');
errorBox.classList.add('hidden');
const reader = new FileReader();
reader.readAsDataURL(file);
reader.onload = async function () {
const base64ImageData = reader.result.split(',')[1];
const prompt = "What is the handwritten hexadecimal string in this image? Provide only the string, without any additional text or explanation.";
const apiKey = "";
const apiUrl = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-05-20:generateContent?key=${apiKey}`;
const payload = {
contents: [
{
role: "user",
parts: [
{ text: prompt },
{
inlineData: {
mimeType: file.type,
data: base64ImageData
}
}
]
}
],
};
try {
const response = await fetch(apiUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload)
});
const result = await response.json();
const text = result?.candidates?.[0]?.content?.parts?.[0]?.text;
if (text) {
resultText.textContent = text.trim();
resultBox.classList.remove('hidden');
} else {
showError('Could not recognize text. Please try again.');
}
} catch (err) {
console.error(err);
showError('An error occurred. Please try again.');
} finally {
loading.classList.add('hidden');
recognizeButton.disabled = false;
}
};
});
copyButton.addEventListener('click', () => {
const textToCopy = resultText.textContent;
if (textToCopy) {
const tempTextArea = document.createElement('textarea');
tempTextArea.value = textToCopy;
document.body.appendChild(tempTextArea);
tempTextArea.select();
document.execCommand('copy');
document.body.removeChild(tempTextArea);
copyButton.textContent = 'Copied!';
setTimeout(() => {
copyButton.textContent = 'Copy Text';
}, 2000);
}
});
function showError(message) {
errorText.textContent = message;
errorBox.classList.remove('hidden');
}
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment