Skip to content

Instantly share code, notes, and snippets.

@dynamicguy
Forked from hxy9243/ocr.ipynb
Created May 26, 2022 04:57
Show Gist options
  • Save dynamicguy/6ea624d1deaffda9179646ca45a63718 to your computer and use it in GitHub Desktop.
Save dynamicguy/6ea624d1deaffda9179646ca45a63718 to your computer and use it in GitHub Desktop.

Revisions

  1. @hxy9243 hxy9243 revised this gist Mar 12, 2021. 1 changed file with 12 additions and 194 deletions.
    206 changes: 12 additions & 194 deletions ocr.ipynb
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@
    {
    "cell_type": "code",
    "execution_count": null,
    "id": "focal-alliance",
    "id": "bacterial-maintenance",
    "metadata": {},
    "outputs": [],
    "source": [
    @@ -14,7 +14,7 @@
    {
    "cell_type": "code",
    "execution_count": 6,
    "id": "acknowledged-sweet",
    "id": "specified-success",
    "metadata": {},
    "outputs": [],
    "source": [
    @@ -29,7 +29,7 @@
    {
    "cell_type": "code",
    "execution_count": 74,
    "id": "about-delay",
    "id": "necessary-tamil",
    "metadata": {},
    "outputs": [
    {
    @@ -66,7 +66,7 @@
    {
    "cell_type": "code",
    "execution_count": 8,
    "id": "imposed-screen",
    "id": "guilty-business",
    "metadata": {},
    "outputs": [
    {
    @@ -95,7 +95,7 @@
    {
    "cell_type": "code",
    "execution_count": 9,
    "id": "everyday-queen",
    "id": "critical-silence",
    "metadata": {},
    "outputs": [
    {
    @@ -122,7 +122,7 @@
    {
    "cell_type": "code",
    "execution_count": 11,
    "id": "bound-stake",
    "id": "minute-specific",
    "metadata": {},
    "outputs": [
    {
    @@ -150,7 +150,7 @@
    {
    "cell_type": "code",
    "execution_count": 13,
    "id": "statistical-newsletter",
    "id": "secondary-horse",
    "metadata": {},
    "outputs": [
    {
    @@ -183,7 +183,7 @@
    {
    "cell_type": "code",
    "execution_count": 103,
    "id": "activated-tuesday",
    "id": "naughty-dairy",
    "metadata": {},
    "outputs": [
    {
    @@ -240,7 +240,7 @@
    {
    "cell_type": "code",
    "execution_count": 104,
    "id": "diverse-volleyball",
    "id": "developmental-cisco",
    "metadata": {},
    "outputs": [
    {
    @@ -260,7 +260,7 @@
    {
    "cell_type": "code",
    "execution_count": 150,
    "id": "american-experiment",
    "id": "presidential-methodology",
    "metadata": {},
    "outputs": [
    {
    @@ -328,7 +328,7 @@
    {
    "cell_type": "code",
    "execution_count": 143,
    "id": "noted-console",
    "id": "ignored-vocabulary",
    "metadata": {},
    "outputs": [
    {
    @@ -384,7 +384,7 @@
    {
    "cell_type": "code",
    "execution_count": 172,
    "id": "temporal-colombia",
    "id": "governing-scotland",
    "metadata": {},
    "outputs": [
    {
    @@ -423,188 +423,6 @@
    "data = DataFrame(np.array(alldata).reshape(n_row, n_col))\n",
    "print(data)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 173,
    "id": "eastern-assistant",
    "metadata": {},
    "outputs": [
    {
    "data": {
    "text/html": [
    "<div>\n",
    "<style scoped>\n",
    " .dataframe tbody tr th:only-of-type {\n",
    " vertical-align: middle;\n",
    " }\n",
    "\n",
    " .dataframe tbody tr th {\n",
    " vertical-align: top;\n",
    " }\n",
    "\n",
    " .dataframe thead th {\n",
    " text-align: right;\n",
    " }\n",
    "</style>\n",
    "<table border=\"1\" class=\"dataframe\">\n",
    " <thead>\n",
    " <tr style=\"text-align: right;\">\n",
    " <th></th>\n",
    " <th>0</th>\n",
    " <th>1</th>\n",
    " <th>2</th>\n",
    " <th>3</th>\n",
    " <th>4</th>\n",
    " <th>5</th>\n",
    " </tr>\n",
    " </thead>\n",
    " <tbody>\n",
    " <tr>\n",
    " <th>0</th>\n",
    " <td>RIVERS:</td>\n",
    " <td>$ 312.00</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>1</th>\n",
    " <td>SMITA</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td>$ 140.00</td>\n",
    " <td>$1,170.00</td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>2</th>\n",
    " <td>JONES</td>\n",
    " <td>$1,170.00</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>3</th>\n",
    " <td>DOE</td>\n",
    " <td>$ 1,123.20</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td>$2,607.15,</td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>4</th>\n",
    " <td>JUPONT</td>\n",
    " <td></td>\n",
    " <td>$ 280.80</td>\n",
    " <td>$ 524.40</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>5</th>\n",
    " <td>HOUSE</td>\n",
    " <td>$ 62.40</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>6</th>\n",
    " <td>PICCO</td>\n",
    " <td></td>\n",
    " <td>$1,560.00</td>\n",
    " <td>$ 936.00</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>7</th>\n",
    " <td>RUTH</td>\n",
    " <td></td>\n",
    " <td>$ 5092.80</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>8</th>\n",
    " <td>DALY</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td>$ 741.00</td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>9</th>\n",
    " <td>WADE</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td>$3,900.00</td>\n",
    " <td>$ 7890.75</td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>10</th>\n",
    " <td>SEVES</td>\n",
    " <td>$ 551.25</td>\n",
    " <td>$ 877.50</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>11</th>\n",
    " <td>WHYTTe”</td>\n",
    " <td>$ 147.00</td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td>$ 780.00</td>\n",
    " <td></td>\n",
    " </tr>\n",
    " <tr>\n",
    " <th>12</th>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " <td></td>\n",
    " </tr>\n",
    " </tbody>\n",
    "</table>\n",
    "</div>"
    ],
    "text/plain": [
    " 0 1 2 3 4 5\n",
    "0 RIVERS: $ 312.00 \n",
    "1 SMITA $ 140.00 $1,170.00 \n",
    "2 JONES $1,170.00 \n",
    "3 DOE $ 1,123.20 $2,607.15, \n",
    "4 JUPONT $ 280.80 $ 524.40 \n",
    "5 HOUSE $ 62.40 \n",
    "6 PICCO $1,560.00 $ 936.00 \n",
    "7 RUTH $ 5092.80 \n",
    "8 DALY $ 741.00 \n",
    "9 WADE $3,900.00 $ 7890.75 \n",
    "10 SEVES $ 551.25 $ 877.50 \n",
    "11 WHYTTe” $ 147.00 $ 780.00 \n",
    "12 "
    ]
    },
    "metadata": {},
    "output_type": "display_data"
    }
    ],
    "source": [
    "display(data)"
    ]
    }
    ],
    "metadata": {
  2. @hxy9243 hxy9243 revised this gist Mar 12, 2021. No changes.
  3. @hxy9243 hxy9243 revised this gist Mar 12, 2021. No changes.
  4. @hxy9243 hxy9243 renamed this gist Mar 12, 2021. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  5. @hxy9243 hxy9243 created this gist Mar 12, 2021.
    631 changes: 631 additions & 0 deletions Excel OCR example
    631 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.