Created
January 31, 2024 18:41
-
-
Save masitings/10a164a7799b8547ca018728b00c59fc to your computer and use it in GitHub Desktop.
Revisions
-
masitings revised this gist
Jan 31, 2024 . 1 changed file with 12 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,7 +4,8 @@ "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyNzM7ZYYooQ/JA3K9asYYNZ", "include_colab_link": true }, "kernelspec": { "name": "python3", @@ -15,6 +16,16 @@ } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "<a href=\"https://colab.research.google.com/gist/masitings/10a164a7799b8547ca018728b00c59fc/document_ai_ocr_ktp.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" ] }, { "cell_type": "code", "execution_count": null, -
masitings created this gist
Jan 31, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,158 @@ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyNzM7ZYYooQ/JA3K9asYYNZ" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nIM7nIARbLDf", "outputId": "5531638a-dbfa-4c38-9aa7-77d62c8dc52f" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" ] } ], "source": [ "from google.colab import drive\n", "import os\n", "\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "source": [ "!pip install google-cloud-documentai\n", "!pip install orjson\n", "!pip install google-cloud" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "a4Sk3h1yesv9", "outputId": "b4ffa9d4-e47a-4c08-98c2-27827d5e0ff4" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: google-cloud-documentai in /usr/local/lib/python3.10/dist-packages (2.21.1)\n", "Requirement already satisfied: google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (2.11.1)\n", "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (1.23.0)\n", "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (3.20.3)\n", "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.62.0)\n", "Requirement already satisfied: google-auth<3.0.dev0,>=2.14.1 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.17.3)\n", "Requirement already satisfied: requests<3.0.0.dev0,>=2.18.0 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.31.0)\n", "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.60.0)\n", "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.48.2)\n", "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (5.3.2)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (0.3.0)\n", "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.16.0)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (4.9)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2023.11.17)\n", "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (0.5.1)\n", "Requirement already satisfied: orjson in /usr/local/lib/python3.10/dist-packages (3.9.12)\n", "Collecting google-cloud\n", " Downloading google_cloud-0.34.0-py2.py3-none-any.whl (1.8 kB)\n", "Installing collected packages: google-cloud\n", "Successfully installed google-cloud-0.34.0\n" ] } ] }, { "cell_type": "code", "source": [ "os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = f'/content/drive/MyDrive/docai/credential.json'" ], "metadata": { "id": "ogEv8AoDfmuJ" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import base64\n", "from google.cloud import documentai\n", "from google.api_core.client_options import ClientOptions\n", "\n", "\n", "opts = ClientOptions(api_endpoint=\"us-documentai.googleapis.com\")\n", "\n", "client = documentai.DocumentProcessorServiceClient(client_options=opts)\n", "\n", "name = client.processor_version_path('760497433370', 'us', '615d49163b214b2', 'pretrained-foundation-model-v1.0-2023-08-22')\n", "\n", "with open('/content/drive/MyDrive/ocr/dataset/ktp5.png', 'rb') as image_file:\n", " fileStream = image_file.read()\n", "\n", "raw_document = documentai.RawDocument(content=fileStream, mime_type='image/png')\n", "\n", "request = documentai.ProcessRequest(\n", " name=name,\n", " raw_document=raw_document,\n", " field_mask='entities',\n", " process_options=None\n", ")\n", "\n", "result = client.process_document(request=request)\n", "\n", "document = result.document\n", "\n", "ktp = dict()\n", "\n", "for entity in document.entities:\n", " ktp[entity.type_] = entity.mention_text\n", "\n", "print(ktp)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FNmlKNSbfzEt", "outputId": "9c36690d-876e-46b2-af3b-612d4f898fa8" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{'birth_place': 'NIAS,', 'nik': '1204050503670001', 'name': 'EDO FURNAMA', 'city': 'KABUPATEN NIAS', 'birth_date': '05-03-1967', 'province': 'PROVINSI SUMATERA UTARA'}\n" ] } ] } ] }