{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyNzM7ZYYooQ/JA3K9asYYNZ", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nIM7nIARbLDf", "outputId": "5531638a-dbfa-4c38-9aa7-77d62c8dc52f" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" ] } ], "source": [ "from google.colab import drive\n", "import os\n", "\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "source": [ "!pip install google-cloud-documentai\n", "!pip install orjson\n", "!pip install google-cloud" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "a4Sk3h1yesv9", "outputId": "b4ffa9d4-e47a-4c08-98c2-27827d5e0ff4" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: google-cloud-documentai in /usr/local/lib/python3.10/dist-packages (2.21.1)\n", "Requirement already satisfied: google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (2.11.1)\n", "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (1.23.0)\n", "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (3.20.3)\n", "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.62.0)\n", "Requirement already satisfied: google-auth<3.0.dev0,>=2.14.1 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.17.3)\n", "Requirement already satisfied: requests<3.0.0.dev0,>=2.18.0 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.31.0)\n", "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.60.0)\n", "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.48.2)\n", "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (5.3.2)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (0.3.0)\n", "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.16.0)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (4.9)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2023.11.17)\n", "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (0.5.1)\n", "Requirement already satisfied: orjson in /usr/local/lib/python3.10/dist-packages (3.9.12)\n", "Collecting google-cloud\n", " Downloading google_cloud-0.34.0-py2.py3-none-any.whl (1.8 kB)\n", "Installing collected packages: google-cloud\n", "Successfully installed google-cloud-0.34.0\n" ] } ] }, { "cell_type": "code", "source": [ "os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = f'/content/drive/MyDrive/docai/credential.json'" ], "metadata": { "id": "ogEv8AoDfmuJ" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import base64\n", "from google.cloud import documentai\n", "from google.api_core.client_options import ClientOptions\n", "\n", "\n", "opts = ClientOptions(api_endpoint=\"us-documentai.googleapis.com\")\n", "\n", "client = documentai.DocumentProcessorServiceClient(client_options=opts)\n", "\n", "name = client.processor_version_path('760497433370', 'us', '615d49163b214b2', 'pretrained-foundation-model-v1.0-2023-08-22')\n", "\n", "with open('/content/drive/MyDrive/ocr/dataset/ktp5.png', 'rb') as image_file:\n", " fileStream = image_file.read()\n", "\n", "raw_document = documentai.RawDocument(content=fileStream, mime_type='image/png')\n", "\n", "request = documentai.ProcessRequest(\n", " name=name,\n", " raw_document=raw_document,\n", " field_mask='entities',\n", " process_options=None\n", ")\n", "\n", "result = client.process_document(request=request)\n", "\n", "document = result.document\n", "\n", "ktp = dict()\n", "\n", "for entity in document.entities:\n", " ktp[entity.type_] = entity.mention_text\n", "\n", "print(ktp)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FNmlKNSbfzEt", "outputId": "9c36690d-876e-46b2-af3b-612d4f898fa8" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{'birth_place': 'NIAS,', 'nik': '1204050503670001', 'name': 'EDO FURNAMA', 'city': 'KABUPATEN NIAS', 'birth_date': '05-03-1967', 'province': 'PROVINSI SUMATERA UTARA'}\n" ] } ] } ] }