Last active
          May 11, 2024 15:17 
        
      - 
      
- 
        Save Zsailer/6da0dc3c97ec873685b7fe58e52d36d7 to your computer and use it in GitHub Desktop. 
Revisions
- 
        Zsailer revised this gist Aug 27, 2019 . 1 changed file with 88 additions and 27 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,18 +9,19 @@ }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pydantic\n", "from pydantic import BaseModel, Schema\n", "from pydantic.main import MetaModel\n", "from schemaorg import main as schemaorg" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -78,7 +79,7 @@ }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -130,7 +131,7 @@ }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -178,9 +179,41 @@ }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'title': 'Thing',\n", " 'description': 'The most generic type of item.',\n", " 'type': 'object',\n", " 'properties': {'additionalType': {'title': 'additionalType',\n", " 'description': \"An additional type for the item, typically used for adding more specific types from external vocabularies in microdata syntax. This is a relationship between something and a class that the thing is in. In RDFa syntax, it is better to use the native RDFa syntax - the 'typeof' attribute - for multiple types. Schema.org tools may have only weaker understanding of extra types, in particular those defined externally.\",\n", " 'type': 'string'},\n", " 'alternateName': {'title': 'alternateName',\n", " 'description': 'An alias for the item.',\n", " 'type': 'string'},\n", " 'description': {'title': 'Description',\n", " 'description': 'A description of the item.',\n", " 'type': 'string'},\n", " 'disambiguatingDescription': {'title': 'Disambiguatingdescription',\n", " 'description': 'A sub property of description. A short description of the item used to disambiguate from other, similar items. Information from other properties (in particular, name) may be necessary for the description to be useful for disambiguation.',\n", " 'type': 'string'}},\n", " 'required': ['additionalType',\n", " 'alternateName',\n", " 'description',\n", " 'disambiguatingDescription'],\n", " '$id': 'https://schema.org/Thing',\n", " '$schema': 'https://schema.org',\n", " 'version': 3.9}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Thing.schema()" ] @@ -201,11 +234,22 @@ }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Object did not validate.\n" ] } ], "source": [ "try: \n", " thing = Thing()\n", "except pydantic.ValidationError:\n", " print(\"Object did not validate.\")" ] }, { @@ -217,7 +261,7 @@ }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -245,7 +289,7 @@ }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -283,38 +327,55 @@ }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Specification base set to http://www.schema.org\n", "Using Version 3.5\n", "Found http://www.schema.org/Thing\n", "Thing: found 12 properties\n", "Specification base set to http://www.schema.org\n", "Using Version 3.5\n", "Found http://www.schema.org/Event\n", "Event: found 47 properties\n" ] } ], "source": [ "class Thing(metaclass=SchemaOrg): pass\n", "class Event(metaclass=SchemaOrg): pass" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Specification base set to http://www.schema.org\n", "Using Version 3.5\n", "Found http://www.schema.org/Person\n", "Person: found 69 properties\n" ] } ], "source": [ "class Person(metaclass=SchemaOrg): pass" ] } ], "metadata": { "kernelspec": { "display_name": "omnipotent (Python 3.7)", "language": "python", "name": "omnipotent" }, "language_info": { "codemirror_mode": { 
- 
        Zsailer revised this gist Aug 27, 2019 . 1 changed file with 6 additions and 22 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -13,7 +13,9 @@ "metadata": {}, "outputs": [], "source": [ "from pydantic import BaseModel, Schema\n", "from pydantic.main import MetaModel\n", "from schemaorg import main as schemaorg" ] }, { @@ -80,8 +82,6 @@ "metadata": {}, "outputs": [], "source": [ "\n", "class MetaSchema(MetaModel):\n", " \"\"\"Metaclass that checks for three required class attributes:\n", @@ -243,15 +243,6 @@ "## Autogenerate pydantic objects from schema.org" ] }, { "cell_type": "code", "execution_count": null, @@ -317,20 +308,13 @@ "source": [ "Event.schema()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.7", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { @@ -347,4 +331,4 @@ }, "nbformat": 4, "nbformat_minor": 4 } 
- 
        Zsailer revised this gist Aug 27, 2019 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -328,9 +328,9 @@ ], "metadata": { "kernelspec": { "display_name": "omnipotent (Python 3.7)", "language": "python", "name": "omnipotent" }, "language_info": { "codemirror_mode": { @@ -347,4 +347,4 @@ }, "nbformat": 4, "nbformat_minor": 4 } 
- 
        Zsailer revised this gist Aug 27, 2019 . No changes.There are no files selected for viewing
- 
        Zsailer revised this gist Aug 27, 2019 . 1 changed file with 36 additions and 39 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -240,7 +240,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ "## Autogenerate pydantic objects from schema.org" ] }, { @@ -258,38 +258,36 @@ "metadata": {}, "outputs": [], "source": [ "class SchemaOrg(MetaSchema):\n", " \n", " def __new__(cls, name, base, dct):\n", " annotations = {}\n", " \n", " data = schemaorg.Schema(name)\n", " \n", " dct = dict(\n", " _title=name,\n", " _id=data.id,\n", " _version=float(data.version),\n", " _schema=data.base,\n", " __doc__=data.comment,\n", " __annotations__={}\n", " )\n", "\n", " # Currently, sets all class variables to type==str for\n", " # demostration purposes.\n", " # Need to develop datatypes for Schema.org objects.\n", " for key, val in data._properties.items():\n", " dct[key] = Schema(\n", " ...,\n", " description=val['comment'],\n", " title=val['label']\n", " )\n", " dct['__annotations__'][key] = str\n", "\n", " base = (BaseModel,) + base\n", " \n", " return super(SchemaOrg, cls).__new__(cls, name, base, dct)\n" ] }, { @@ -298,7 +296,8 @@ "metadata": {}, "outputs": [], "source": [ "class Thing(metaclass=SchemaOrg): pass\n", "class Event(metaclass=SchemaOrg): pass" ] }, { @@ -307,7 +306,7 @@ "metadata": {}, "outputs": [], "source": [ "Thing.schema()" ] }, { @@ -316,24 +315,22 @@ "metadata": {}, "outputs": [], "source": [ "Event.schema()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.7", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { 
- 
        Zsailer created this gist Aug 27, 2019 .There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,353 @@ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Define and validate schema.org structured data in Python with Pydantic " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pydantic import BaseModel, Schema" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Thing(BaseModel):\n", " \"\"\"The most generic type of item.\"\"\"\n", " \n", " # Need to define extra items at the top level \n", " class Config:\n", " title = 'Thing'\n", " schema_extra = {\n", " '$schema': 'https://schema.org',\n", " '$id': 'https://schema.org/Thing',\n", " }\n", " \n", " additionalType: str = Schema(\n", " ...,\n", " title='additionalType',\n", " description=(\n", " \"An additional type for the item, typically \"\n", " \"used for adding more specific types from \"\n", " \"external vocabularies in microdata syntax. \"\n", " \"This is a relationship between something and \"\n", " \"a class that the thing is in. In RDFa syntax, \"\n", " \"it is better to use the native RDFa syntax - \"\n", " \"the 'typeof' attribute - for multiple types. \"\n", " \"Schema.org tools may have only weaker \"\n", " \"understanding of extra types, in particular \"\n", " \"those defined externally.\"\n", " )\n", " )\n", " \n", " alternateName: str = Schema(\n", " ...,\n", " title='alternateName',\n", " description=\"An alias for the item.\"\n", " )\n", " \n", " description: str = Schema(\n", " ...,\n", " description=\"A description of the item.\"\n", " )\n", " \n", " disambiguatingDescription: str = Schema(\n", " ...,\n", " description=\"A sub property of description. A short description of the item used to disambiguate from other, similar items. Information from other properties (in particular, name) may be necessary for the description to be useful for disambiguation.\"\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I want to enforce the attributes in the `Config` accessor, *and* I think the accessor syntax is a bit ugly. Let's see if we can make it go away." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pydantic.main import MetaModel\n", "\n", "\n", "class MetaSchema(MetaModel):\n", " \"\"\"Metaclass that checks for three required class attributes:\n", " 1. _id: the ID of the event\n", " 2. _version: the version of the current schema.\n", " 3. _title: the name of the schema.\n", "\n", " These attribute are mapped to pydantic.BaseModel's `Config` inner class\n", " for proper schema generation+validation.\n", " \"\"\"\n", " def __new__(cls, name, base, dct):\n", " # Check that required keys are found.\n", " if not all((key in dct for key in ['_id', '_title', '_version', '_schema'])):\n", " raise AttributeError('Required class attributes are missing from the {} class.'.format(name))\n", "\n", " # Check that keys are the proper types.\n", " if not all((\n", " type(dct['_id']) in (str, type(None)),\n", " type(dct['_version']) in (float, type(None)),\n", " type(dct['_title']) in (str, type(None)),\n", " type(dct['_schema']) in (str, type(None)),\n", " )):\n", " raise TypeError('Check the class attributes types: \"_id\" must be a string, '\n", " '\"_version\" must be an integer, and \"_title\" must be a string.')\n", "\n", " # Add a Config inner class to this Pydantic model.\n", " class Config:\n", " title = dct['_title']\n", " schema_extra = {\n", " '$id': dct['_id'],\n", " '$schema': dct['_schema'],\n", " 'version': dct['_version']\n", " }\n", "\n", " dct['Config'] = Config\n", " return super(MetaSchema, cls).__new__(cls, name, base, dct)\n", "\n", "\n", "class JsonSchema(pydantic.BaseModel, metaclass=MetaSchema):\n", " \"\"\"A pydantic base Model for JSON schemas.\"\"\"\n", " _id: str = None\n", " _version: float = None\n", " _title: str = None\n", " _schema: str = None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Thing(JsonSchema):\n", " \"\"\"The most generic type of item.\"\"\"\n", " # Define top level attributes\n", " _id = 'https://schema.org/Thing'\n", " _version = 3.9\n", " _title = 'Thing'\n", " _schema = 'https://schema.org'\n", "\n", " additionalType: str = Schema(\n", " ...,\n", " title='additionalType',\n", " description=(\n", " \"An additional type for the item, typically \"\n", " \"used for adding more specific types from \"\n", " \"external vocabularies in microdata syntax. \"\n", " \"This is a relationship between something and \"\n", " \"a class that the thing is in. In RDFa syntax, \"\n", " \"it is better to use the native RDFa syntax - \"\n", " \"the 'typeof' attribute - for multiple types. \"\n", " \"Schema.org tools may have only weaker \"\n", " \"understanding of extra types, in particular \"\n", " \"those defined externally.\"\n", " )\n", " )\n", " \n", " alternateName: str = Schema(\n", " ...,\n", " title='alternateName',\n", " description=\"An alias for the item.\"\n", " )\n", " \n", " description: str = Schema(\n", " ...,\n", " description=\"A description of the item.\"\n", " )\n", " \n", " disambiguatingDescription: str = Schema(\n", " ...,\n", " description=\"A sub property of description. A short description of the item used to disambiguate from other, similar items. Information from other properties (in particular, name) may be necessary for the description to be useful for disambiguation.\"\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Thing.schema()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Validate a new object" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What happens when we create an invalid object?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "thing = Thing()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now let's try a valid object..." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "thing = Thing(\n", " alternateName='New Thing',\n", " description='This is a new thing',\n", " disambiguatingDescription='This thing is unique.',\n", " additionalType='No additional type'\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "No error was raised." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Attempting to autogenerate pydantic objects from schema.org" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from schemaorg import main as schemaorg" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "obj._properties" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "obj = schemaorg.Schema(\"Thing\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def schematize(obj_name):\n", " # Upload schemaorg object.\n", " obj = schemaorg.Schema(obj_name)\n", " dct = dict(\n", " _title=obj_name,\n", " __doc__=obj.comment,\n", " _id=obj.id,\n", " _version=obj.version,\n", " )\n", " for key, val in obj.items():\n", " dct[key] = val\n", " \n", "\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x = type('Thing', (MetaSchema,), {})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x.__name__" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "obj._properties.keys()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "Thing.schema()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 4 }