{"service":"Sunamura OCR API","version":"0.3.1","started_at":"2026-04-09 17:26:53 +0900","engines":{"tesseract":{"available":true,"mode":"in_process","executable":"/usr/bin/tesseract","supports":["text","tsv"]},"ndlocr_lite":{"available":true,"mode":"subprocess","command":["/opt/OCR-API/.venv-ndlocr-lite/bin/ndlocr-lite"],"executable":"/opt/OCR-API/.venv-ndlocr-lite/bin/ndlocr-lite","supports":["text"],"requires":{"python":">=3.10","runtime":"onnxruntime"},"runtime_config":{"contract":{"NDLOCR_LITE_CMD":{"required":true,"default":"ndlocr-lite","example":"/opt/OCR-API/.venv-ndlocr-lite/bin/ndlocr-lite","description":"NDLOCR-Lite を起動するコマンド。Python スクリプト直呼びの複合コマンドも可。"},"NDLOCR_LITE_DEVICE":{"required":false,"default":"cpu","allowed":["cpu","cuda"],"description":"NDLOCR-Lite の実行デバイス。"},"NDLOCR_LITE_VIZ":{"required":false,"default":"0","allowed":["0","1"],"description":"1 のとき可視化画像を出力する。"},"NDLOCR_LITE_TIMEOUT_SEC":{"required":false,"default":"180","description":"NDLOCR-Lite 実行のタイムアウト秒数。"},"NDLOCR_LITE_TMP_ROOT":{"required":false,"default":null,"example":"/tmp/ocr","description":"NDLOCR-Lite の一時ディレクトリ親。未指定時は OS の標準 tmp を使う。"}},"current":{"NDLOCR_LITE_CMD":"/opt/OCR-API/.venv-ndlocr-lite/bin/ndlocr-lite","NDLOCR_LITE_DEVICE":"cpu","NDLOCR_LITE_VIZ":null,"NDLOCR_LITE_TIMEOUT_SEC":null,"NDLOCR_LITE_TMP_ROOT":"/tmp/ocr"},"resolved":{"command":["/opt/OCR-API/.venv-ndlocr-lite/bin/ndlocr-lite"],"device":"cpu","viz":false,"timeout_sec":180,"tmp_root":"/tmp/ocr","executable":"/opt/OCR-API/.venv-ndlocr-lite/bin/ndlocr-lite"},"validation":{"command_resolves":true,"device_valid":true,"viz_valid":true,"timeout_valid":true}}}},"presets":{"_comment":"Sunamura OCR Preprocess Presets (2025 initial template)","invoice":{"_desc":"税理士系・綺麗な請求書向け。線が太く、文字が比較的クッキリしているPDF。","blur":2,"blur_mode":"gaussian","threshold_algo":"sauvola","threshold_mode":"none","use_clahe":true,"enhance_sharp":1.5},"legacy_invoice":{"_desc":"大谷商事系・薄い/FAX混じり系の古い帳票向け。背景ムラ大きい・文字細い・にじみ対策。","blur":2,"blur_mode":"median","threshold_algo":"adaptive","threshold_mode":"adaptive(21,5)","use_clahe":true,"enhance_sharp":0.0},"retail_receipt_mixed_layout":{"_desc":"一般小売の領収書/売上票向け。縦書き横書き混在・背景ムラ対策。CLAHE + adaptive(15,3)。","blur":2,"blur_mode":"gaussian","threshold_algo":"adaptive","threshold_mode":"adaptive(15,3)","use_clahe":true,"enhance_sharp":0.0},"maruni_invoice":{"_desc":"丸二物産請求書専用。シワ・ゴミ対策でノイズを落とし、しきい値はOtsu。","blur":3,"blur_mode":"median","threshold_algo":"otsu","threshold_mode":"none","use_clahe":false,"enhance_sharp":0.0},"shimizu_invoice":{"_desc":"清水機材商事請求書向け。黄緑の罫線/ヘッダを局所的に暗くして白文字を浮かせる。","blur":2,"blur_mode":"gaussian","threshold_algo":"adaptive","threshold_mode":"adaptive(15,3)","use_clahe":false,"enhance_sharp":0.0,"green_region_mode":"darken","green_region_scale":0.5,"noise_open":true,"noise_open_size":3,"noise_open_shape":"cross","noise_open_stage":"pre"},"fax_scan":{"_desc":"FAX受信スキャン向け。低コントラスト・筋ノイズ対策にCLAHE + adaptive。","blur":1,"blur_mode":"gaussian","threshold_algo":"adaptive","threshold_mode":"adaptive(19,4)","use_clahe":true,"enhance_sharp":0.0},"fax_scan_strong":{"_desc":"FAX受信スキャン向け（強め）。ノイズ除去強化 + adaptive(31,5)。","blur":3,"blur_mode":"median","threshold_algo":"adaptive","threshold_mode":"adaptive(31,5)","use_clahe":true,"enhance_sharp":0.0},"ocr_heavy_noise":{"_desc":"ノイズが多い写真ベースのPDF・スマホ撮影などの雑音が多い場合","blur":3,"blur_mode":"median","threshold_algo":"otsu","threshold_mode":"none","use_clahe":true,"enhance_sharp":1.2},"ocr_dense_text":{"_desc":"小さい文字が密集した技術文書・契約書向け。細文字強調、過剰シャープ抑制。","blur":1,"blur_mode":"gaussian","threshold_algo":"adaptive","threshold_mode":"adaptive(19,4)","use_clahe":true,"enhance_sharp":0.6},"debug_raw":{"_desc":"前処理ほぼ無効。デバッグ用途。差分チェック用。","blur":0,"blur_mode":"gaussian","threshold_algo":"adaptive","threshold_mode":"adaptive(3,1)","use_clahe":false,"enhance_sharp":0.0},"debug_strong_clahe":{"blur":0,"blur_mode":"gaussian","threshold_algo":"adaptive","threshold_mode":"adaptive(21,5)","use_clahe":true,"enhance_sharp":false}},"preset_usage":{"file":"presets.json","hot_reload":true,"description":"presets.json にプリセットを JSON で記述し、リクエスト時に form の preset=<name> で指定するとその設定で前処理します。preset が無効/未指定の場合はフォームの値が使われます。","example":{"invoice":{"blur":2,"blur_mode":"gaussian","threshold_algo":"sauvola","threshold_mode":"none","use_clahe":true,"enhance_sharp":1.5}},"env_override":"PRESET_FILE で presets.json の場所を変更できます"},"error_response":{"shape":{"status":"error","engine":"tesseract | ndlocr_lite | null","error":{"code":"stable error code","message":"human readable summary","stage":"request | fetch | render | ocr","retryable":true}},"codes":[{"code":"invalid_api_key","stage":"request","retryable":false,"description":"x-api-key が不正"},{"code":"invalid_engine","stage":"request","retryable":false,"description":"engine が未対応"},{"code":"invalid_psm","stage":"request","retryable":false,"description":"psm が未対応"},{"code":"invalid_region_mode","stage":"request","retryable":false,"description":"region_mode が未対応"},{"code":"unsupported_ret","stage":"request","retryable":false,"description":"engine に対して ret が未対応"},{"code":"unsupported_lang","stage":"request","retryable":false,"description":"engine に対して lang が未対応"},{"code":"remote_pdf_fetch_failed","stage":"fetch","retryable":true,"description":"URL からの PDF 取得に失敗"},{"code":"pdf_render_failed","stage":"render","retryable":true,"description":"PDF の画像化に失敗"},{"code":"engine_execution_failed","stage":"ocr","retryable":true,"description":"OCR エンジン実行に失敗"},{"code":"page_ocr_failed","stage":"ocr","retryable":true,"description":"ページ単位の OCR に失敗"}]},"endpoints":[{"method":"POST","path":"/api/ocr/pdf","auth":"Header: x-api-key","description":"PDF を OCR しテキストまたは TSV を返す","form_params":{"file":"PDF (multipart/form-data)","engine":"tesseract | ndlocr_lite","region_mode":"full (split reserved for future use)","dpi":"OCR 用 DPI（推奨 300）","psm":"Tesseract Page Segmentation Mode（推奨 6）","lang":"OCR 言語（例: jpn+eng）","ret":"text / tsv","preset":"preset 名（preset.json を参照）","blur":"平滑化 0=無効","blur_mode":"gaussian|median|bilateral","threshold_mode":"adaptive(15,3) のように指定","threshold_algo":"adaptive|otsu|sauvola|niblack","use_clahe":"コントラスト補正 true/false","enhance_sharp":"Unsharp Mask による輪郭強調","green_region_mode":"none|darken (黄緑領域の補正)","green_region_scale":"darken 時の係数 (例: 0.6)","noise_open":"小さな点ノイズ除去 true/false","noise_open_size":"カーネルサイズ (例: 3)","noise_open_shape":"cross|ellipse|rect","noise_open_stage":"pre|post"},"example_curl":"curl -X POST https://HOST/api/ocr/pdf -H 'x-api-key: KEY' -F 'file=@invoice.pdf' -F 'dpi=600' -F 'psm=6' -F 'preset=invoice'"},{"method":"POST","path":"/api/ocr/debug_preprocess","auth":"Header: x-api-key","description":"前処理結果の PNG 画像を返す"},{"method":"GET","path":"/api/ocr/engines","auth":"none"},{"method":"GET","path":"/healthz","auth":"none"},{"method":"GET","path":"/docs","auth":"none"},{"method":"GET","path":"/openapi.json","auth":"none"}]}