效果预览
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Tika OCR在线识别工具</title>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
<style>
:root {
--primary-color: #3a86ff;
--primary-dark: #2667cc;
--primary-light: #e0ebff;
--secondary-color: #ff006e;
--success-color: #38b000;
--warning-color: #ffbe0b;
--error-color: #ff5252;
--dark-color: #1a1a2e;
--text-color: #333;
--text-light: #6c757d;
--bg-color: #f8f9fa;
--border-color: #e9ecef;
--card-shadow: 0 10px 30px rgba(0, 0, 0, 0.05);
--transition: all 0.3s ease;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
background-color: var(--bg-color);
color: var(--text-color);
line-height: 1.6;
padding: 0;
min-height: 100vh;
display: flex;
flex-direction: column;
}
.app-wrapper {
max-width: 1000px;
width: 100%;
margin: 0 auto;
padding: 20px;
flex: 1;
}
.top-bar {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 20px;
padding: 10px 0;
}
.app-logo {
display: flex;
align-items: center;
font-weight: 600;
font-size: 18px;
color: var(--primary-color);
}
.app-logo i {
margin-right: 10px;
font-size: 24px;
}
.settings-toggle {
background-color: white;
color: var(--primary-color);
border: 2px solid var(--primary-light);
width: 42px;
height: 42px;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
cursor: pointer;
transition: var(--transition);
box-shadow: 0 4px 10px rgba(58, 134, 255, 0.1);
}
.settings-toggle:hover {
background-color: var(--primary-color);
color: white;
transform: translateY(-2px);
box-shadow: 0 6px 15px rgba(58, 134, 255, 0.2);
}
.settings-toggle i {
font-size: 18px;
}
.container {
background-color: white;
border-radius: 16px;
box-shadow: var(--card-shadow);
overflow: hidden;
}
.header {
background: linear-gradient(135deg, var(--primary-color), #4361ee);
color: white;
padding: 40px 30px;
text-align: center;
position: relative;
overflow: hidden;
}
.header::before {
content: '';
position: absolute;
top: -50%;
left: -50%;
width: 200%;
height: 200%;
background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%);
z-index: 0;
}
.header h1 {
font-weight: 700;
font-size: 32px;
margin-bottom: 10px;
position: relative;
z-index: 1;
}
.header p {
opacity: 0.9;
font-size: 16px;
max-width: 600px;
margin: 0 auto;
position: relative;
z-index: 1;
}
.content {
padding: 40px;
}
.settings-panel {
background-color: white;
border-radius: 12px;
padding: 25px;
margin-bottom: 30px;
display: none;
animation: slideDown 0.4s ease-out;
box-shadow: 0 5px 20px rgba(0,0,0,0.05);
border: 1px solid var(--border-color);
}
@keyframes slideDown {
from { opacity: 0; transform: translateY(-20px); }
to { opacity: 1; transform: translateY(0); }
}
.settings-panel.active {
display: block;
}
.settings-panel h3 {
margin-bottom: 20px;
font-weight: 600;
color: var(--dark-color);
display: flex;
align-items: center;
font-size: 18px;
}
.settings-panel h3 i {
margin-right: 10px;
color: var(--primary-color);
}
.form-group {
margin-bottom: 20px;
}
.form-group label {
display: block;
margin-bottom: 8px;
font-weight: 500;
color: var(--text-color);
font-size: 15px;
}
.form-control {
width: 100%;
padding: 14px 16px;
border: 1px solid var(--border-color);
border-radius: 10px;
font-size: 15px;
transition: var(--transition);
background-color: #fafafa;
}
.form-control:focus {
outline: none;
border-color: var(--primary-color);
box-shadow: 0 0 0 3px rgba(58, 134, 255, 0.1);
background-color: white;
}
.upload-area {
border: 2px dashed var(--border-color);
border-radius: 16px;
padding: 50px 20px;
text-align: center;
cursor: pointer;
transition: var(--transition);
background-color: white;
margin-bottom: 30px;
position: relative;
}
.upload-area:hover {
border-color: var(--primary-color);
background-color: var(--primary-light);
}
.upload-area.active {
border-color: var(--primary-color);
background-color: var(--primary-light);
}
.upload-icon {
font-size: 60px;
color: var(--primary-color);
margin-bottom: 20px;
transition: var(--transition);
}
.upload-area:hover .upload-icon {
transform: translateY(-5px);
}
.upload-text {
font-size: 20px;
margin-bottom: 10px;
color: var(--text-color);
font-weight: 500;
}
.upload-hint {
font-size: 14px;
color: var(--text-light);
}
#fileInput {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
opacity: 0;
cursor: pointer;
z-index: 10;
}
.btn {
display: inline-block;
background-color: var(--primary-color);
color: white;
border: none;
padding: 14px 28px;
font-size: 16px;
border-radius: 10px;
cursor: pointer;
transition: var(--transition);
font-weight: 600;
text-align: center;
}
.btn:hover {
background-color: var(--primary-dark);
transform: translateY(-2px);
box-shadow: 0 6px 15px rgba(58, 134, 255, 0.2);
}
.btn:active {
transform: translateY(0);
}
.btn:disabled {
background-color: #d1d1d1;
cursor: not-allowed;
transform: none;
box-shadow: none;
}
.btn i {
margin-right: 10px;
}
.loading {
display: none;
text-align: center;
margin: 30px 0;
}
.spinner {
display: inline-block;
width: 50px;
height: 50px;
border: 4px solid rgba(58, 134, 255, 0.1);
border-radius: 50%;
border-top-color: var(--primary-color);
animation: spin 1s linear infinite;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.loading-text {
margin-top: 15px;
color: var(--text-color);
font-size: 16px;
font-weight: 500;
}
.result-section {
margin-top: 40px;
background-color: white;
border-radius: 16px;
padding: 30px;
box-shadow: var(--card-shadow);
border: 1px solid var(--border-color);
}
.result-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 20px;
padding-bottom: 15px;
border-bottom: 1px solid var(--border-color);
}
.result-header h2 {
font-weight: 600;
color: var(--dark-color);
font-size: 20px;
}
.copy-btn {
background-color: white;
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 10px 16px;
font-size: 14px;
color: var(--text-color);
cursor: pointer;
transition: var(--transition);
display: flex;
align-items: center;
font-weight: 500;
}
.copy-btn:hover {
background-color: var(--primary-light);
border-color: var(--primary-color);
color: var(--primary-color);
}
.copy-btn i {
margin-right: 8px;
}
.result-container {
background-color: #fafafa;
border: 1px solid var(--border-color);
border-radius: 12px;
padding: 25px;
min-height: 200px;
white-space: pre-wrap;
line-height: 1.7;
font-size: 16px;
color: var(--text-color);
overflow-wrap: break-word;
}
.image-preview {
margin-top: 30px;
text-align: center;
background-color: #fafafa;
border-radius: 12px;
padding: 20px;
border: 1px solid var(--border-color);
}
.image-preview img {
max-width: 100%;
max-height: 400px;
border-radius: 8px;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
}
.notification {
position: fixed;
top: 20px;
right: 20px;
padding: 16px 25px;
border-radius: 10px;
background-color: var(--success-color);
color: white;
box-shadow: 0 5px 15px rgba(0,0,0,0.15);
transform: translateX(150%);
transition: transform 0.4s cubic-bezier(0.175, 0.885, 0.32, 1.275);
z-index: 1000;
font-weight: 500;
display: flex;
align-items: center;
}
.notification::before {
content: '\f058';
font-family: 'Font Awesome 6 Free';
font-weight: 900;
margin-right: 10px;
font-size: 18px;
}
.notification.show {
transform: translateX(0);
}
.notification.error {
background-color: var(--error-color);
}
.notification.error::before {
content: '\f057';
}
.footer {
text-align: center;
padding: 20px 0;
color: var(--text-light);
font-size: 14px;
margin-top: 30px;
}
@media (max-width: 768px) {
.content {
padding: 25px 20px;
}
.header {
padding: 30px 20px;
}
.header h1 {
font-size: 26px;
}
.upload-text {
font-size: 18px;
}
.upload-icon {
font-size: 50px;
}
.result-container {
padding: 20px;
font-size: 15px;
}
}
@media (max-width: 480px) {
.app-wrapper {
padding: 15px;
}
.header h1 {
font-size: 22px;
}
.btn {
width: 100%;
}
.result-header {
flex-direction: column;
align-items: flex-start;
}
.copy-btn {
margin-top: 10px;
}
}
</style>
</head>
<body>
<div class="app-wrapper">
<div class="top-bar">
<div class="app-logo">
<i class="fas fa-file-alt"></i>
<span>Tika OCR</span>
</div>
<button class="settings-toggle" id="settingsToggle" title="设置">
<i class="fas fa-cog"></i>
</button>
</div>
<div class="container">
<div class="header">
<h1>Tika OCR在线识别工具</h1>
<p>快速、准确地从图片中提取文字内容</p>
</div>
<div class="content">
<div class="settings-panel" id="settingsPanel">
<h3><i class="fas fa-cog"></i> 识别设置</h3>
<div class="form-group">
<label for="tikaUrl">Tika服务器地址</label>
<input type="text" id="tikaUrl" class="form-control" value="http://127.0.0.1:9998" placeholder="输入Tika服务器地址">
</div>
<div class="form-group">
<label for="language">识别语言</label>
<input type="text" id="language" class="form-control" value="eng" placeholder="例如:eng, chi_sim">
</div>
</div>
<div class="upload-area" id="uploadArea">
<input type="file" id="fileInput" accept="image/*">
<div class="upload-icon">
<i class="fas fa-cloud-upload-alt"></i>
</div>
<div class="upload-text">点击或拖拽图片到此处上传</div>
<div class="upload-hint">支持 JPG, PNG, GIF, BMP 等格式</div>
</div>
<button id="recognizeBtn" class="btn" disabled>
<i class="fas fa-magic"></i> 开始识别
</button>
<div class="loading" id="loading">
<div class="spinner"></div>
<div class="loading-text">正在识别中,请稍候...</div>
</div>
<div class="result-section">
<div class="result-header">
<h2>识别结果</h2>
<button id="copyBtn" class="copy-btn">
<i class="far fa-copy"></i> 复制文本
</button>
</div>
<div id="result" class="result-container">请上传图片后点击识别按钮</div>
<div id="imagePreview" class="image-preview"></div>
</div>
</div>
</div>
<div class="footer">
<p>Tika OCR © <span id="currentYear"></span> All rights reserved</p>
</div>
</div>
<div id="notification" class="notification">
<span id="notificationText">文本已复制到剪贴板</span>
</div>
<script>
document.addEventListener('DOMContentLoaded', function() {
const uploadArea = document.getElementById('uploadArea');
const fileInput = document.getElementById('fileInput');
const recognizeBtn = document.getElementById('recognizeBtn');
const resultDiv = document.getElementById('result');
const loadingDiv = document.getElementById('loading');
const tikaUrlInput = document.getElementById('tikaUrl');
const languageInput = document.getElementById('language');
const copyBtn = document.getElementById('copyBtn');
const notification = document.getElementById('notification');
const notificationText = document.getElementById('notificationText');
const imagePreview = document.getElementById('imagePreview');
const settingsToggle = document.getElementById('settingsToggle');
const settingsPanel = document.getElementById('settingsPanel');
const currentYearSpan = document.getElementById('currentYear');
currentYearSpan.textContent = new Date().getFullYear();
let currentFile = null;
settingsToggle.addEventListener('click', () => {
settingsPanel.classList.toggle('active');
});
uploadArea.addEventListener('dragover', (e) => {
e.preventDefault();
uploadArea.classList.add('active');
});
uploadArea.addEventListener('dragleave', () => {
uploadArea.classList.remove('active');
});
uploadArea.addEventListener('drop', (e) => {
e.preventDefault();
uploadArea.classList.remove('active');
if (e.dataTransfer.files.length) {
handleFile(e.dataTransfer.files[0]);
}
});
fileInput.addEventListener('change', function(e) {
if (this.files && this.files.length) {
handleFile(this.files[0]);
}
});
recognizeBtn.addEventListener('click', async () => {
if (!currentFile) return;
recognizeBtn.disabled = true;
loadingDiv.style.display = 'block';
resultDiv.textContent = '';
try {
const tikaUrl = tikaUrlInput.value.trim();
const language = languageInput.value.trim();
const reader = new FileReader();
reader.onload = async function(e) {
const base64Data = e.target.result.split(',')[1];
try {
const text = await performOCR(base64Data, tikaUrl, language);
const formattedText = formatOCRText(text);
resultDiv.textContent = formattedText || "未识别到文字内容";
} catch (error) {
resultDiv.textContent = `识别出错: ${error.message}`;
showNotification(`识别出错: ${error.message}`, true);
} finally {
recognizeBtn.disabled = false;
loadingDiv.style.display = 'none';
}
};
reader.readAsDataURL(currentFile);
} catch (error) {
resultDiv.textContent = `识别出错: ${error.message}`;
showNotification(`识别出错: ${error.message}`, true);
recognizeBtn.disabled = false;
loadingDiv.style.display = 'none';
}
});
copyBtn.addEventListener('click', () => {
const text = resultDiv.textContent;
if (text && text !== "请上传图片后点击识别按钮" && text !== "未识别到文字内容") {
navigator.clipboard.writeText(text).then(() => {
showNotification("文本已复制到剪贴板");
}).catch(err => {
showNotification("复制失败: " + err, true);
});
}
});
function handleFile(file) {
if (!file.type.match('image.*')) {
showNotification("请选择图片文件", true);
return;
}
currentFile = file;
recognizeBtn.disabled = false;
resultDiv.textContent = `已选择文件: ${file.name}\n大小: ${(file.size / 1024).toFixed(2)} KB\n\n请点击"开始识别"按钮进行文字识别`;
const reader = new FileReader();
reader.onload = function(e) {
imagePreview.innerHTML = `<img src="${e.target.result}" alt="预览图">`;
};
reader.readAsDataURL(file);
showNotification(`已选择文件: ${file.name}`, false);
}
async function performOCR(base64Data, tikaUrl, language) {
const url = `${tikaUrl}/tika`;
const binaryData = atob(base64Data);
const bytes = new Uint8Array(binaryData.length);
for (let i = 0; i < binaryData.length; i++) {
bytes[i] = binaryData.charCodeAt(i);
}
const headers = {
'Accept': 'text/plain',
'Content-Type': 'application/octet-stream',
'X-Tika-OCRLanguage': language || 'eng'
};
try {
const response = await fetch(url, {
method: 'PUT',
headers: headers,
body: bytes
});
if (!response.ok) {
throw new Error(`Tika服务器错误: ${response.status} ${response.statusText}`);
}
return await response.text();
} catch (error) {
throw new Error(`OCR识别失败: ${error.message}`);
}
}
function formatOCRText(text) {
if (!text) return "";
let processedText = text.trim();
const lines = processedText.split(/\n+/);
let titleLines = [];
let contentStartIndex = 0;
for (let i = 0; i < Math.min(4, lines.length); i++) {
const line = lines[i].trim();
if (!line) continue;
if (i === 0 && line.length < 100 && /^[A-Z]/.test(line) && !/[.!?]$/.test(line)) {
titleLines.push(line);
contentStartIndex = i + 1;
continue;
}
if (titleLines.length > 0) {
const lastTitleWord = titleLines[titleLines.length - 1].split(" ").pop().toLowerCase();
const firstWordOfLine = line.split(" ")[0].toLowerCase();
const connectingWords = ['of', 'and', 'the', 'to', 'in', 'on', 'with', 'by', 'as', 'for'];
if (line.length < 100 && /^[A-Z]/.test(line) && !/[.!?]$/.test(line) &&
(connectingWords.includes(firstWordOfLine) || connectingWords.includes(lastTitleWord))) {
titleLines.push(line);
contentStartIndex = i + 1;
} else {
break;
}
}
}
let title = titleLines.join(" ");
if (contentStartIndex > 0 && contentStartIndex < lines.length) {
const firstContentLine = lines[contentStartIndex];
if (firstContentLine && firstContentLine.length > 10 &&
/^[A-Z]/.test(firstContentLine) &&
/[a-z]/.test(firstContentLine) &&
!firstContentLine.endsWith('.')) {
const potentialTitleWords = title.split(" ");
const potentialContentWords = firstContentLine.split(" ");
if (potentialContentWords.length >= 3 &&
potentialTitleWords[potentialTitleWords.length - 1].toLowerCase() ===
potentialContentWords[0].toLowerCase()) {
contentStartIndex++;
title = [...potentialTitleWords, ...potentialContentWords.slice(1)].join(" ");
}
}
}
processedText = lines.slice(contentStartIndex).join("\n");
processedText = processedText.replace(/\n{3,}/g, '\n\n');
processedText = processedText.replace(/([a-zA-Z0-9,;:'"])[\n\r]+([a-z])/g, '$1 $2');
const connectingWords = ['for', 'of', 'and', 'the', 'to', 'in', 'on', 'with', 'by', 'as', 'our', 'future'];
for (const word of connectingWords) {
const regex = new RegExp(`(\\s${word})[\n\r]+`, 'gi');
processedText = processedText.replace(regex, `$1 `);
}
processedText = processedText.replace(/([a-zA-Z]),([a-zA-Z])/g, '$1, $2');
processedText = processedText.replace(/([a-zA-Z])\.([A-Z])/g, '$1. $2');
processedText = processedText.replace(/([^.!?"\n])[\n\r]+(?!\n)/g, '$1 ');
processedText = processedText.replace(/\n{2,}/g, '\n\n');
if (title) {
processedText = title + '\n\n' + processedText;
}
return processedText.trim();
}
function showNotification(message, isError = false) {
notificationText.textContent = message;
notification.classList.toggle('error', isError);
notification.classList.add('show');
setTimeout(() => {
notification.classList.remove('show');
}, 3000);
}
});
</script>
</body>
</html>
Tika的Docker一键安装命令
迷你版
docker run --restart=always -it -d --name tika -p 9998:9998 apache/tika:latest
全量版
docker run --restart=always -it -d --name tika -p 9998:9998 apache/tika:latest-full
特点:
不依赖外部接口,无需网络连接
仅支持英文文本识别(中文会显示乱码)
硬件要求不高,处理速度极快
识别质量与Workers AI相当
数据本地处理,防止隐私泄露