fucai-server/OCR_API_DOCUMENT.md

12 KiB
Raw Blame History

OCR 识别接口 API 文档

接口概述

结婚证OCR识别接口集成百度AI的结婚证识别能力返回结婚证上所有主要信息的识别结果包括识别准确度probability和位置信息location


接口端点

1. 图片上传接口

URL: /marriage/ocr/upload

方法: POST

请求参数:

参数名 类型 必填 说明
file MultipartFile 结婚证图片文件

请求示例:

curl -X POST \
  -F "file=@marriage_certificate.jpg" \
  http://localhost:8080/marriage/ocr/upload

响应示例:

{
    "code": 200,
    "msg": "success",
    "data": {
        "uploadId": "a1b2c3d4e5f6g7h8"
    }
}

响应参数:

参数名 类型 说明
uploadId String 上传文件标识后续parse接口需要使用

2. 图片解析接口

URL: /marriage/ocr/parse

方法: POST

Content-Type: application/json

请求参数:

参数名 类型 必填 说明
mobile String 手机号
smsCode String 验证码
uploadId String 上传文件标识来自upload接口

请求示例:

{
    "mobile": "18888888888",
    "smsCode": "123456",
    "uploadId": "a1b2c3d4e5f6g7h8"
}

响应示例:

{
    "code": 200,
    "msg": "success",
    "data": {
        "raw": "{\"words_result_num\": 14, \"words_result\": {...}}",
        "words": ["王连杰", "320321199504011218", "1995年04月01日", ...],
        "parsed": {
            "husbandName": "王连杰",
            "husbandId": "320321199504011218",
            "husbandBirthDate": "1995-04-01",
            "husbandNationality": "中国",
            "husbandGender": "男",
            "wifeName": "张丹",
            "wifeId": "320321199406197047",
            "wifeBirthDate": "1994-06-19",
            "wifeNationality": "中国",
            "wifeGender": "女",
            "marriageNo": "320321201700004108",
            "certificateHolder": "王连杰",
            "registerDate": "2017-04-01"
        },
        "parsed_detailed": {
            "husbandName": {
                "word": "王连杰",
                "probability": {
                    "average": 20.68798065,
                    "min": 0.9106679559
                },
                "location": {
                    "width": 109,
                    "height": 47,
                    "top": 933,
                    "left": 253
                }
            },
            "wifeName": {
                "word": "张丹",
                "probability": {
                    "average": 19.14912224,
                    "min": 0.8554975986
                },
                "location": {
                    "width": 83,
                    "height": 43,
                    "top": 1204,
                    "left": 239
                }
            },
            "husbandId": {
                "word": "320321199504011218",
                "probability": {
                    "average": 13.2870388,
                    "min": 0.5172381401
                },
                "location": {
                    "width": 341,
                    "height": 68,
                    "top": 1081,
                    "left": 343
                }
            },
            "wifeId": {
                "word": "320321199406197047",
                "probability": {
                    "average": 15.98988342,
                    "min": 0.6194867492
                },
                "location": {
                    "width": 336,
                    "height": 56,
                    "top": 1351,
                    "left": 326
                }
            },
            "husbandBirthDate": {
                "word": "1995-04-01",
                "probability": {
                    "average": 20.66628647,
                    "min": 0.7240950465
                },
                "location": {
                    "width": 250,
                    "height": 55,
                    "top": 1044,
                    "left": 857
                }
            },
            "wifeBirthDate": {
                "word": "1994-06-19",
                "probability": {
                    "average": 25.62935066,
                    "min": 0.9226108789
                },
                "location": {
                    "width": 255,
                    "height": 56,
                    "top": 1322,
                    "left": 829
                }
            },
            "husbandNationality": {
                "word": "中国",
                "probability": {
                    "average": 17.19703484,
                    "min": 0.7144192457
                },
                "location": {
                    "width": 79,
                    "height": 43,
                    "top": 1011,
                    "left": 249
                }
            },
            "wifeNationality": {
                "word": "中国",
                "probability": {
                    "average": 23.41218376,
                    "min": 0.9498358369
                },
                "location": {
                    "width": 79,
                    "height": 46,
                    "top": 1264,
                    "left": 242
                }
            },
            "husbandGender": {
                "word": "男",
                "probability": {
                    "average": 24.97878838,
                    "min": 0.9302400351
                },
                "location": {
                    "width": 39,
                    "height": 40,
                    "top": 973,
                    "left": 792
                }
            },
            "wifeGender": {
                "word": "女",
                "probability": {
                    "average": 21.57674408,
                    "min": 0.8877936602
                },
                "location": {
                    "width": 42,
                    "height": 42,
                    "top": 1243,
                    "left": 765
                }
            },
            "marriageNo": {
                "word": "320321201700004108",
                "probability": {
                    "average": 16.35309982,
                    "min": 0.6457977891
                },
                "location": {
                    "width": 363,
                    "height": 44,
                    "top": 650,
                    "left": 272
                }
            },
            "certificateHolder": {
                "word": "王连杰",
                "probability": {
                    "average": 16.20750237,
                    "min": 0.6932016015
                },
                "location": {
                    "width": 119,
                    "height": 44,
                    "top": 362,
                    "left": 271
                }
            },
            "registerDate": {
                "word": "2017-04-01",
                "probability": {
                    "average": 19.06731987,
                    "min": 0.7248777151
                },
                "location": {
                    "width": 354,
                    "height": 42,
                    "top": 511,
                    "left": 272
                }
            }
        }
    }
}

响应数据说明

顶级字段

字段名 类型 说明
code Integer 错误码200表示成功
msg String 错误消息或"success"
data Object 响应数据体

data 字段说明

字段名 类型 说明
raw String 百度OCR API原始返回结果JSON字符串
words Array 识别出的所有文本内容(数组格式)
parsed Object 简化格式的解析结果Map<String, String>),用于向后兼容
parsed_detailed Object 新增 详细格式的解析结果Map<String, OcrFieldData>包含probability和location

OcrFieldData 结构

{
    "word": "识别的文本内容",
    "probability": {
        "average": 识别准确度平均值,
        "min": 识别准确度最小值
    },
    "location": {
        "width": 文本框宽度,
        "height": 文本框高度,
        "top": 距离图片顶部的像素数,
        "left": 距离图片左侧的像素数
    }
}

支持的字段列表

字段键 说明
husbandName 男方姓名
husbandId 男方身份证号
husbandBirthDate 男方出生日期
husbandNationality 男方国籍
husbandGender 男方性别
wifeName 女方姓名
wifeId 女方身份证号
wifeBirthDate 女方出生日期
wifeNationality 女方国籍
wifeGender 女方性别
marriageNo 结婚证字号
certificateHolder 持证人
registerDate 登记日期
remark 备注(可选)

错误处理

常见错误响应

验证码错误:

{
    "code": 400,
    "msg": "验证码错误,请重新输入!",
    "data": null
}

文件已过期:

{
    "code": 400,
    "msg": "上传文件不存在或已过期,请重新上传!",
    "data": null
}

配置未设置:

{
    "code": 400,
    "msg": "百度OCR配置未设置请联系管理员",
    "data": null
}

识别失败:

{
    "code": 400,
    "msg": "识别失败,请稍后再试!",
    "data": null
}

使用场景

场景1: 获取识别结果的准确度信息

// 前端代码示例
const response = await fetch('/marriage/ocr/parse', {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
        mobile: '18888888888',
        smsCode: '123456',
        uploadId: 'xxxxx'
    })
});

const result = await response.json();
const wifeNameData = result.data.parsed_detailed.wifeName;

console.log('女方姓名:', wifeNameData.word);
console.log('平均准确度:', wifeNameData.probability.average + '%');
console.log('最小准确度:', wifeNameData.probability.min);

// 根据准确度判断是否需要人工审核
if (wifeNameData.probability.average < 50) {
    alert('识别准确度较低,请人工审核');
}

场景2: 在图片上标注识别结果位置

// 使用location信息在图片上绘制识别结果的位置框
const location = result.data.parsed_detailed.husbandId.location;

canvas.drawRect({
    x: location.left,
    y: location.top,
    width: location.width,
    height: location.height,
    strokeStyle: 'red',
    lineWidth: 2
});

场景3: 向后兼容 - 获取简化结果

// 仍然可以使用 parsed 字段获取简化的文本结果
const simpleParsed = result.data.parsed;
console.log('男方姓名:', simpleParsed.husbandName);
console.log('女方姓名:', simpleParsed.wifeName);
console.log('结婚证号:', simpleParsed.marriageNo);
console.log('登记日期:', simpleParsed.registerDate);

重要说明

  1. 概率值范围: 0-100越高表示识别准确度越高
  2. 位置坐标: 以图片左上角为原点 (0, 0),单位为像素
  3. 日期格式: 自动转换为 "YYYY-MM-DD" 格式
  4. 证号格式: 仅保留数字部分
  5. 文件有效期: 上传后10分钟内有效
  6. 向后兼容: 原有的 parsed 字段保持不变,可继续使用

集成建议

  1. 可信度检查: 使用 probability.average 判断识别质量,建议设置 60% 以上为合格
  2. 异常处理: 对于低置信度的字段,建议提示用户进行人工审核或重新上传
  3. 性能优化: parsed_detailed 字段包含完整信息,客户端可按需使用
  4. 安全性: 敏感信息(如身份证号)应在服务端进行加密处理
  5. 日志记录: 建议记录识别结果和概率值,用于后续模型优化和问题追溯