Skip to content

Instantly share code, notes, and snippets.

@masitings
Created January 31, 2024 18:41
Show Gist options
  • Save masitings/10a164a7799b8547ca018728b00c59fc to your computer and use it in GitHub Desktop.
Save masitings/10a164a7799b8547ca018728b00c59fc to your computer and use it in GitHub Desktop.

Revisions

  1. masitings revised this gist Jan 31, 2024. 1 changed file with 12 additions and 1 deletion.
    13 changes: 12 additions & 1 deletion document_ai_ocr_ktp.ipynb
    Original file line number Diff line number Diff line change
    @@ -4,7 +4,8 @@
    "metadata": {
    "colab": {
    "provenance": [],
    "authorship_tag": "ABX9TyNzM7ZYYooQ/JA3K9asYYNZ"
    "authorship_tag": "ABX9TyNzM7ZYYooQ/JA3K9asYYNZ",
    "include_colab_link": true
    },
    "kernelspec": {
    "name": "python3",
    @@ -15,6 +16,16 @@
    }
    },
    "cells": [
    {
    "cell_type": "markdown",
    "metadata": {
    "id": "view-in-github",
    "colab_type": "text"
    },
    "source": [
    "<a href=\"https://colab.research.google.com/gist/masitings/10a164a7799b8547ca018728b00c59fc/document_ai_ocr_ktp.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": null,
  2. masitings created this gist Jan 31, 2024.
    158 changes: 158 additions & 0 deletions document_ai_ocr_ktp.ipynb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,158 @@
    {
    "nbformat": 4,
    "nbformat_minor": 0,
    "metadata": {
    "colab": {
    "provenance": [],
    "authorship_tag": "ABX9TyNzM7ZYYooQ/JA3K9asYYNZ"
    },
    "kernelspec": {
    "name": "python3",
    "display_name": "Python 3"
    },
    "language_info": {
    "name": "python"
    }
    },
    "cells": [
    {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/"
    },
    "id": "nIM7nIARbLDf",
    "outputId": "5531638a-dbfa-4c38-9aa7-77d62c8dc52f"
    },
    "outputs": [
    {
    "output_type": "stream",
    "name": "stdout",
    "text": [
    "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
    ]
    }
    ],
    "source": [
    "from google.colab import drive\n",
    "import os\n",
    "\n",
    "drive.mount('/content/drive')"
    ]
    },
    {
    "cell_type": "code",
    "source": [
    "!pip install google-cloud-documentai\n",
    "!pip install orjson\n",
    "!pip install google-cloud"
    ],
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/"
    },
    "id": "a4Sk3h1yesv9",
    "outputId": "b4ffa9d4-e47a-4c08-98c2-27827d5e0ff4"
    },
    "execution_count": null,
    "outputs": [
    {
    "output_type": "stream",
    "name": "stdout",
    "text": [
    "Requirement already satisfied: google-cloud-documentai in /usr/local/lib/python3.10/dist-packages (2.21.1)\n",
    "Requirement already satisfied: google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (2.11.1)\n",
    "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (1.23.0)\n",
    "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (3.20.3)\n",
    "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.62.0)\n",
    "Requirement already satisfied: google-auth<3.0.dev0,>=2.14.1 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.17.3)\n",
    "Requirement already satisfied: requests<3.0.0.dev0,>=2.18.0 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.31.0)\n",
    "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.60.0)\n",
    "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.48.2)\n",
    "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (5.3.2)\n",
    "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (0.3.0)\n",
    "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.16.0)\n",
    "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (4.9)\n",
    "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (3.3.2)\n",
    "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (3.6)\n",
    "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.0.7)\n",
    "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2023.11.17)\n",
    "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (0.5.1)\n",
    "Requirement already satisfied: orjson in /usr/local/lib/python3.10/dist-packages (3.9.12)\n",
    "Collecting google-cloud\n",
    " Downloading google_cloud-0.34.0-py2.py3-none-any.whl (1.8 kB)\n",
    "Installing collected packages: google-cloud\n",
    "Successfully installed google-cloud-0.34.0\n"
    ]
    }
    ]
    },
    {
    "cell_type": "code",
    "source": [
    "os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = f'/content/drive/MyDrive/docai/credential.json'"
    ],
    "metadata": {
    "id": "ogEv8AoDfmuJ"
    },
    "execution_count": null,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "import base64\n",
    "from google.cloud import documentai\n",
    "from google.api_core.client_options import ClientOptions\n",
    "\n",
    "\n",
    "opts = ClientOptions(api_endpoint=\"us-documentai.googleapis.com\")\n",
    "\n",
    "client = documentai.DocumentProcessorServiceClient(client_options=opts)\n",
    "\n",
    "name = client.processor_version_path('760497433370', 'us', '615d49163b214b2', 'pretrained-foundation-model-v1.0-2023-08-22')\n",
    "\n",
    "with open('/content/drive/MyDrive/ocr/dataset/ktp5.png', 'rb') as image_file:\n",
    " fileStream = image_file.read()\n",
    "\n",
    "raw_document = documentai.RawDocument(content=fileStream, mime_type='image/png')\n",
    "\n",
    "request = documentai.ProcessRequest(\n",
    " name=name,\n",
    " raw_document=raw_document,\n",
    " field_mask='entities',\n",
    " process_options=None\n",
    ")\n",
    "\n",
    "result = client.process_document(request=request)\n",
    "\n",
    "document = result.document\n",
    "\n",
    "ktp = dict()\n",
    "\n",
    "for entity in document.entities:\n",
    " ktp[entity.type_] = entity.mention_text\n",
    "\n",
    "print(ktp)"
    ],
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/"
    },
    "id": "FNmlKNSbfzEt",
    "outputId": "9c36690d-876e-46b2-af3b-612d4f898fa8"
    },
    "execution_count": null,
    "outputs": [
    {
    "output_type": "stream",
    "name": "stdout",
    "text": [
    "{'birth_place': 'NIAS,', 'nik': '1204050503670001', 'name': 'EDO FURNAMA', 'city': 'KABUPATEN NIAS', 'birth_date': '05-03-1967', 'province': 'PROVINSI SUMATERA UTARA'}\n"
    ]
    }
    ]
    }
    ]
    }