Skip to content

Instantly share code, notes, and snippets.

@HAKSOAT
Created April 20, 2020 19:15
Show Gist options
  • Select an option

  • Save HAKSOAT/16ca8ae47830cd51605f7f7c4c272ac1 to your computer and use it in GitHub Desktop.

Select an option

Save HAKSOAT/16ca8ae47830cd51605f7f7c4c272ac1 to your computer and use it in GitHub Desktop.
{
"tokens": [
{
"token": "As",
"start_offset": 0,
"end_offset": 2,
"type": "word",
"position": 0
},
{
"token": " ",
"start_offset": 2,
"end_offset": 3,
"type": "word",
"position": 1
},
{
"token": "a",
"start_offset": 3,
"end_offset": 4,
"type": "word",
"position": 2
},
{
"token": " ",
"start_offset": 4,
"end_offset": 5,
"type": "word",
"position": 3
},
{
"token": "sentence",
"start_offset": 5,
"end_offset": 13,
"type": "word",
"position": 4
},
{
"token": ",",
"start_offset": 13,
"end_offset": 14,
"type": "word",
"position": 5
},
{
"token": " ",
"start_offset": 14,
"end_offset": 15,
"type": "word",
"position": 6
},
{
"token": "this",
"start_offset": 15,
"end_offset": 19,
"type": "word",
"position": 7
},
{
"token": " ",
"start_offset": 19,
"end_offset": 20,
"type": "word",
"position": 8
},
{
"token": "34",
"start_offset": 20,
"end_offset": 22,
"type": "word",
"position": 9
},
{
"token": " ",
"start_offset": 22,
"end_offset": 23,
"type": "word",
"position": 10
},
{
"token": "includes",
"start_offset": 23,
"end_offset": 31,
"type": "word",
"position": 11
},
{
"token": " ",
"start_offset": 31,
"end_offset": 32,
"type": "word",
"position": 12
},
{
"token": "punctuation",
"start_offset": 32,
"end_offset": 43,
"type": "word",
"position": 13
},
{
"token": ".",
"start_offset": 43,
"end_offset": 44,
"type": "word",
"position": 14
},
{
"token": " ",
"start_offset": 44,
"end_offset": 45,
"type": "word",
"position": 15
},
{
"token": "\n\n",
"start_offset": 45,
"end_offset": 47,
"type": "word",
"position": 16
},
{
"token": "==",
"start_offset": 47,
"end_offset": 49,
"type": "word",
"position": 17
},
{
"token": "Header",
"start_offset": 49,
"end_offset": 55,
"type": "word",
"position": 18
},
{
"token": "!",
"start_offset": 55,
"end_offset": 56,
"type": "word",
"position": 19
},
{
"token": "==",
"start_offset": 56,
"end_offset": 58,
"type": "word",
"position": 20
},
{
"token": "\n",
"start_offset": 58,
"end_offset": 59,
"type": "word",
"position": 21
},
{
"token": "मादरचोद",
"start_offset": 59,
"end_offset": 66,
"type": "word",
"position": 22
},
{
"token": " ",
"start_offset": 66,
"end_offset": 67,
"type": "word",
"position": 23
},
{
"token": "मादरचोद",
"start_offset": 67,
"end_offset": 74,
"type": "word",
"position": 24
},
{
"token": " ",
"start_offset": 74,
"end_offset": 75,
"type": "word",
"position": 25
},
{
"token": "مُن",
"start_offset": 75,
"end_offset": 78,
"type": "word",
"position": 26
},
{
"token": "ی",
"start_offset": 78,
"end_offset": 79,
"type": "word",
"position": 27
},
{
"token": "ر",
"start_offset": 79,
"end_offset": 80,
"type": "word",
"position": 28
},
{
"token": " ",
"start_offset": 80,
"end_offset": 81,
"type": "word",
"position": 29
},
{
"token": "克",
"start_offset": 81,
"end_offset": 82,
"type": "word",
"position": 30
},
{
"token": "·",
"start_offset": 82,
"end_offset": 83,
"type": "word",
"position": 31
},
{
"token": "科",
"start_offset": 83,
"end_offset": 84,
"type": "word",
"position": 32
},
{
"token": "伊",
"start_offset": 84,
"end_offset": 85,
"type": "word",
"position": 33
},
{
"token": "尔",
"start_offset": 85,
"end_offset": 86,
"type": "word",
"position": 34
},
{
"token": " ",
"start_offset": 86,
"end_offset": 87,
"type": "word",
"position": 35
},
{
"token": "し",
"start_offset": 87,
"end_offset": 88,
"type": "word",
"position": 36
},
{
"token": "〤",
"start_offset": 88,
"end_offset": 89,
"type": "word",
"position": 37
},
{
"token": "。",
"start_offset": 89,
"end_offset": 90,
"type": "word",
"position": 38
},
{
"token": "foobar",
"start_offset": 90,
"end_offset": 96,
"type": "word",
"position": 39
},
{
"token": "!",
"start_offset": 96,
"end_offset": 97,
"type": "word",
"position": 40
},
{
"token": "And",
"start_offset": 97,
"end_offset": 100,
"type": "word",
"position": 41
},
{
"token": " ",
"start_offset": 100,
"end_offset": 101,
"type": "word",
"position": 42
},
{
"token": "then",
"start_offset": 101,
"end_offset": 105,
"type": "word",
"position": 43
},
{
"token": " ",
"start_offset": 105,
"end_offset": 106,
"type": "word",
"position": 44
},
{
"token": "we",
"start_offset": 106,
"end_offset": 108,
"type": "word",
"position": 45
},
{
"token": " ",
"start_offset": 108,
"end_offset": 109,
"type": "word",
"position": 46
},
{
"token": "have",
"start_offset": 109,
"end_offset": 113,
"type": "word",
"position": 47
},
{
"token": " ",
"start_offset": 113,
"end_offset": 114,
"type": "word",
"position": 48
},
{
"token": "another",
"start_offset": 114,
"end_offset": 121,
"type": "word",
"position": 49
},
{
"token": " ",
"start_offset": 121,
"end_offset": 122,
"type": "word",
"position": 50
},
{
"token": "sentence",
"start_offset": 122,
"end_offset": 130,
"type": "word",
"position": 51
},
{
"token": " ",
"start_offset": 130,
"end_offset": 131,
"type": "word",
"position": 52
},
{
"token": "here",
"start_offset": 131,
"end_offset": 135,
"type": "word",
"position": 53
},
{
"token": "!",
"start_offset": 135,
"end_offset": 136,
"type": "word",
"position": 54
},
{
"token": "\n",
"start_offset": 136,
"end_offset": 137,
"type": "word",
"position": 55
},
{
"token": "[",
"start_offset": 137,
"end_offset": 138,
"type": "word",
"position": 56
},
{
"token": "//google.com",
"start_offset": 138,
"end_offset": 150,
"type": "word",
"position": 57
},
{
"token": " ",
"start_offset": 150,
"end_offset": 151,
"type": "word",
"position": 58
},
{
"token": "foo",
"start_offset": 151,
"end_offset": 154,
"type": "word",
"position": 59
},
{
"token": "]",
"start_offset": 154,
"end_offset": 155,
"type": "word",
"position": 60
},
{
"token": " ",
"start_offset": 155,
"end_offset": 156,
"type": "word",
"position": 61
},
{
"token": "https://website.gov?param=value",
"start_offset": 156,
"end_offset": 187,
"type": "word",
"position": 62
},
{
"token": "\n",
"start_offset": 187,
"end_offset": 188,
"type": "word",
"position": 63
},
{
"token": "peoples",
"start_offset": 188,
"end_offset": 195,
"type": "word",
"position": 64
},
{
"token": "\\",
"start_offset": 195,
"end_offset": 196,
"type": "word",
"position": 65
},
{
"token": "'",
"start_offset": 196,
"end_offset": 197,
"type": "word",
"position": 66
},
{
"token": " ",
"start_offset": 197,
"end_offset": 198,
"type": "word",
"position": 67
},
{
"token": "ain",
"start_offset": 198,
"end_offset": 201,
"type": "word",
"position": 68
},
{
"token": "\\",
"start_offset": 201,
"end_offset": 202,
"type": "word",
"position": 69
},
{
"token": "'",
"start_offset": 202,
"end_offset": 203,
"type": "word",
"position": 70
},
{
"token": "t",
"start_offset": 203,
"end_offset": 204,
"type": "word",
"position": 71
},
{
"token": " ",
"start_offset": 204,
"end_offset": 205,
"type": "word",
"position": 72
},
{
"token": "d’encyclop",
"start_offset": 205,
"end_offset": 215,
"type": "word",
"position": 73
},
{
"token": "é",
"start_offset": 215,
"end_offset": 216,
"type": "word",
"position": 74
},
{
"token": "die",
"start_offset": 216,
"end_offset": 219,
"type": "word",
"position": 75
},
{
"token": "\n",
"start_offset": 219,
"end_offset": 220,
"type": "word",
"position": 76
},
{
"token": "<",
"start_offset": 220,
"end_offset": 221,
"type": "word",
"position": 77
},
{
"token": "ref",
"start_offset": 221,
"end_offset": 224,
"type": "word",
"position": 78
},
{
"token": ">",
"start_offset": 224,
"end_offset": 225,
"type": "word",
"position": 79
},
{
"token": "derp",
"start_offset": 225,
"end_offset": 229,
"type": "word",
"position": 80
},
{
"token": "<",
"start_offset": 229,
"end_offset": 230,
"type": "word",
"position": 81
},
{
"token": "/",
"start_offset": 230,
"end_offset": 231,
"type": "word",
"position": 82
},
{
"token": "ref",
"start_offset": 231,
"end_offset": 234,
"type": "word",
"position": 83
},
{
"token": ">",
"start_offset": 234,
"end_offset": 235,
"type": "word",
"position": 84
},
{
"token": "<",
"start_offset": 235,
"end_offset": 236,
"type": "word",
"position": 85
},
{
"token": "ref",
"start_offset": 236,
"end_offset": 239,
"type": "word",
"position": 86
},
{
"token": " ",
"start_offset": 239,
"end_offset": 240,
"type": "word",
"position": 87
},
{
"token": "name",
"start_offset": 240,
"end_offset": 244,
"type": "word",
"position": 88
},
{
"token": "=",
"start_offset": 244,
"end_offset": 245,
"type": "word",
"position": 89
},
{
"token": "\"",
"start_offset": 245,
"end_offset": 246,
"type": "word",
"position": 90
},
{
"token": "foo",
"start_offset": 246,
"end_offset": 249,
"type": "word",
"position": 91
},
{
"token": "\"",
"start_offset": 249,
"end_offset": 250,
"type": "word",
"position": 92
},
{
"token": " ",
"start_offset": 250,
"end_offset": 251,
"type": "word",
"position": 93
},
{
"token": "/",
"start_offset": 251,
"end_offset": 252,
"type": "word",
"position": 94
},
{
"token": ">",
"start_offset": 252,
"end_offset": 253,
"type": "word",
"position": 95
},
{
"token": "[[",
"start_offset": 253,
"end_offset": 255,
"type": "word",
"position": 96
},
{
"token": "foo",
"start_offset": 255,
"end_offset": 258,
"type": "word",
"position": 97
},
{
"token": "|",
"start_offset": 258,
"end_offset": 259,
"type": "word",
"position": 98
},
{
"token": "bar",
"start_offset": 259,
"end_offset": 262,
"type": "word",
"position": 99
},
{
"token": "]]",
"start_offset": 262,
"end_offset": 264,
"type": "word",
"position": 100
},
{
"token": "mailto:[email protected]",
"start_offset": 264,
"end_offset": 287,
"type": "word",
"position": 101
},
{
"token": " ",
"start_offset": 287,
"end_offset": 288,
"type": "word",
"position": 102
},
{
"token": "위",
"start_offset": 288,
"end_offset": 289,
"type": "word",
"position": 103
},
{
"token": "키",
"start_offset": 289,
"end_offset": 290,
"type": "word",
"position": 104
},
{
"token": "백",
"start_offset": 290,
"end_offset": 291,
"type": "word",
"position": 105
},
{
"token": "과",
"start_offset": 291,
"end_offset": 292,
"type": "word",
"position": 106
},
{
"token": "의",
"start_offset": 292,
"end_offset": 293,
"type": "word",
"position": 107
},
{
"token": " ",
"start_offset": 293,
"end_offset": 294,
"type": "word",
"position": 108
},
{
"token": "운",
"start_offset": 294,
"end_offset": 295,
"type": "word",
"position": 109
},
{
"token": "영",
"start_offset": 295,
"end_offset": 296,
"type": "word",
"position": 110
},
{
"token": "은",
"start_offset": 296,
"end_offset": 297,
"type": "word",
"position": 111
},
{
"token": " ",
"start_offset": 297,
"end_offset": 298,
"type": "word",
"position": 112
},
{
"token": "비",
"start_offset": 298,
"end_offset": 299,
"type": "word",
"position": 113
},
{
"token": "영",
"start_offset": 299,
"end_offset": 300,
"type": "word",
"position": 114
},
{
"token": "리",
"start_offset": 300,
"end_offset": 301,
"type": "word",
"position": 115
},
{
"token": " ",
"start_offset": 301,
"end_offset": 302,
"type": "word",
"position": 116
},
{
"token": "단",
"start_offset": 302,
"end_offset": 303,
"type": "word",
"position": 117
},
{
"token": "체",
"start_offset": 303,
"end_offset": 304,
"type": "word",
"position": 118
},
{
"token": "인",
"start_offset": 304,
"end_offset": 305,
"type": "word",
"position": 119
},
{
"token": " ",
"start_offset": 305,
"end_offset": 306,
"type": "word",
"position": 120
},
{
"token": "위",
"start_offset": 306,
"end_offset": 307,
"type": "word",
"position": 121
},
{
"token": "키",
"start_offset": 307,
"end_offset": 308,
"type": "word",
"position": 122
},
{
"token": "미",
"start_offset": 308,
"end_offset": 309,
"type": "word",
"position": 123
},
{
"token": "디",
"start_offset": 309,
"end_offset": 310,
"type": "word",
"position": 124
},
{
"token": "어",
"start_offset": 310,
"end_offset": 311,
"type": "word",
"position": 125
},
{
"token": " ",
"start_offset": 311,
"end_offset": 312,
"type": "word",
"position": 126
},
{
"token": "재",
"start_offset": 312,
"end_offset": 313,
"type": "word",
"position": 127
},
{
"token": "단",
"start_offset": 313,
"end_offset": 314,
"type": "word",
"position": 128
},
{
"token": "이",
"start_offset": 314,
"end_offset": 315,
"type": "word",
"position": 129
},
{
"token": " ",
"start_offset": 315,
"end_offset": 316,
"type": "word",
"position": 130
},
{
"token": "দেখার",
"start_offset": 316,
"end_offset": 321,
"type": "word",
"position": 131
},
{
"token": " ",
"start_offset": 321,
"end_offset": 322,
"type": "word",
"position": 132
},
{
"token": "পর",
"start_offset": 322,
"end_offset": 324,
"type": "word",
"position": 133
},
{
"token": " ",
"start_offset": 324,
"end_offset": 325,
"type": "word",
"position": 134
},
{
"token": "তিনি",
"start_offset": 325,
"end_offset": 329,
"type": "word",
"position": 135
},
{
"token": " ",
"start_offset": 329,
"end_offset": 330,
"type": "word",
"position": 136
},
{
"token": "চ্চিত্র",
"start_offset": 330,
"end_offset": 337,
"type": "word",
"position": 137
},
{
"token": " ",
"start_offset": 337,
"end_offset": 338,
"type": "word",
"position": 138
},
{
"token": "\\",
"start_offset": 338,
"end_offset": 339,
"type": "word",
"position": 139
},
{
"token": "'",
"start_offset": 339,
"end_offset": 340,
"type": "word",
"position": 140
},
{
"token": "\\",
"start_offset": 340,
"end_offset": 341,
"type": "word",
"position": 141
},
{
"token": "'",
"start_offset": 341,
"end_offset": 342,
"type": "word",
"position": 142
},
{
"token": "\\",
"start_offset": 342,
"end_offset": 343,
"type": "word",
"position": 143
},
{
"token": "'",
"start_offset": 343,
"end_offset": 344,
"type": "word",
"position": 144
},
{
"token": "some",
"start_offset": 344,
"end_offset": 348,
"type": "word",
"position": 145
},
{
"token": " ",
"start_offset": 348,
"end_offset": 349,
"type": "word",
"position": 146
},
{
"token": "bold",
"start_offset": 349,
"end_offset": 353,
"type": "word",
"position": 147
},
{
"token": "\\",
"start_offset": 353,
"end_offset": 354,
"type": "word",
"position": 148
},
{
"token": "'",
"start_offset": 354,
"end_offset": 355,
"type": "word",
"position": 149
},
{
"token": "\\",
"start_offset": 355,
"end_offset": 356,
"type": "word",
"position": 150
},
{
"token": "'",
"start_offset": 356,
"end_offset": 357,
"type": "word",
"position": 151
},
{
"token": "\\",
"start_offset": 357,
"end_offset": 358,
"type": "word",
"position": 152
},
{
"token": "'",
"start_offset": 358,
"end_offset": 359,
"type": "word",
"position": 153
},
{
"token": " ",
"start_offset": 359,
"end_offset": 360,
"type": "word",
"position": 154
},
{
"token": "text",
"start_offset": 360,
"end_offset": 364,
"type": "word",
"position": 155
},
{
"token": " ",
"start_offset": 364,
"end_offset": 365,
"type": "word",
"position": 156
},
{
"token": "m80",
"start_offset": 365,
"end_offset": 368,
"type": "word",
"position": 157
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment