Skip to content

Instantly share code, notes, and snippets.

@LukDeveloper
Forked from andre-carvalho/download-deter-data.py
Created November 20, 2021 20:10
Show Gist options
  • Save LukDeveloper/25d543451d3b303cd1d8b1010ae7fe73 to your computer and use it in GitHub Desktop.
Save LukDeveloper/25d543451d3b303cd1d8b1010ae7fe73 to your computer and use it in GitHub Desktop.

Revisions

  1. @andre-carvalho andre-carvalho revised this gist Sep 14, 2021. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion download-deter-data.py
    Original file line number Diff line number Diff line change
    @@ -110,7 +110,7 @@ def __buildQueryString(self, OUTPUTFORMAT=None):
    https://www.ogc.org/standards/wfs
    """
    # Filters example (by date interval and uf)
    CQL_FILTER="date BETWEEN '{0}' AND '{1}'".format(self.START_DATE,self.END_DATE)
    CQL_FILTER="view_date BETWEEN '{0}' AND '{1}'".format(self.START_DATE,self.END_DATE)
    if self.UF: CQL_FILTER="{0} AND uf='{1}'".format(CQL_FILTER,self.UF)
    # WFS parameters
    SERVICE="WFS"
  2. @andre-carvalho andre-carvalho revised this gist Aug 12, 2020. 1 changed file with 116 additions and 16 deletions.
    132 changes: 116 additions & 16 deletions download-deter-data.py
    Original file line number Diff line number Diff line change
    @@ -22,9 +22,10 @@
    http://terrabrasilis.dpi.inpe.br/categoria/conteudo-tecnico/
    """

    import requests, os
    import requests, os, io
    from requests.auth import HTTPBasicAuth
    from datetime import datetime
    from xml.etree import ElementTree as xmlTree

    class DownloadWFS:
    """
    @@ -49,45 +50,74 @@ def __init__(self,user=None,password=None):
    The next filter on the states, uses the UF parameter and the
    values ​​are the acronyms of the state names, like PA or AM.
    Important note: If the range used to filter is very wide, the number
    of resources returned may be greater than the maximum limit allowed by the server.
    Important note: If the range used to filter is very wide, the number of resources
    returned may be greater than the maximum limit allowed by the server.
    In this case, this version has been prepared for pagination and the shapefile is
    downloaded in parts.
    """

    # warning: before change the time interval, pay attention into notes on constructor.
    self.START_DATE="2020-01-01"
    self.START_DATE="2018-01-01"
    self.END_DATE=datetime.today().strftime('%Y-%m-%d')
    self.UF="PA"
    self.UF=None

    self.WORKSPACE_NAME="deter-amz"
    # To change the desired layer, change the following value.
    # The public layer "deter_amz" or the controled layer "deter_amz_auth"
    self.LAYER_NAME="deter_amz"

    # The output file name (layer_name_start_date_end_date_uf)
    self.OUTPUT_FILENAME="{0}_{1}_{2}_{3}".format(self.LAYER_NAME,self.START_DATE,self.END_DATE, self.UF)
    # The output file name (layer_name_start_date_end_date)
    self.OUTPUT_FILENAME="{0}_{1}_{2}".format(self.LAYER_NAME,self.START_DATE,self.END_DATE)
    self.AUTH=None

    if user and password:
    self.AUTH=HTTPBasicAuth(user, password)

    self.URL_BASE="terrabrasilis.dpi.inpe.br"

    def __buildQueryString(self):
    def __buildBaseURL(self):
    host="terrabrasilis.dpi.inpe.br"
    url="http://{0}/geoserver/{1}/{2}/wfs".format(host,self.WORKSPACE_NAME,self.LAYER_NAME)
    return url

    def __buildQueryString(self, OUTPUTFORMAT=None):
    """
    Building the query string to call the WFS service.
    The parameter: OUTPUTFORMAT, the output format for the WFS GetFeature operation described
    in the AllowedValues ​​section in the capabilities document.
    <ows:Parameter name="outputFormat">
    <ows:AllowedValues>
    <ows:Value>application/gml+xml; version=3.2</ows:Value>
    <ows:Value>GML2</ows:Value>
    <ows:Value>KML</ows:Value>
    <ows:Value>SHAPE-ZIP</ows:Value>
    <ows:Value>application/json</ows:Value>
    <ows:Value>application/vnd.google-earth.kml xml</ows:Value>
    <ows:Value>application/vnd.google-earth.kml+xml</ows:Value>
    <ows:Value>csv</ows:Value>
    <ows:Value>gml3</ows:Value>
    <ows:Value>gml32</ows:Value>
    <ows:Value>json</ows:Value>
    <ows:Value>text/xml; subtype=gml/2.1.2</ows:Value>
    <ows:Value>text/xml; subtype=gml/3.1.1</ows:Value>
    <ows:Value>text/xml; subtype=gml/3.2</ows:Value>
    </ows:AllowedValues>
    </ows:Parameter>
    To change defined filters and discover more possibilities
    you should learn more about WFS standard and how to filter using CQL.
    https://www.ogc.org/standards/wfs
    """
    # Filters example (by date interval and uf)
    CQL_FILTER="date BETWEEN '{0}' AND '{1}' AND uf='{2}'".format(self.START_DATE,self.END_DATE,self.UF)
    CQL_FILTER="date BETWEEN '{0}' AND '{1}'".format(self.START_DATE,self.END_DATE)
    if self.UF: CQL_FILTER="{0} AND uf='{1}'".format(CQL_FILTER,self.UF)
    # WFS parameters
    SERVICE="WFS"
    REQUEST="GetFeature"
    VERSION="2.0.0"
    # if OUTPUTFORMAT is changed, check the output file extension within the get method in this class.
    OUTPUTFORMAT="SHAPE-ZIP"
    OUTPUTFORMAT= ("SHAPE-ZIP" if not OUTPUTFORMAT else OUTPUTFORMAT)
    exceptions="text/xml"
    # define the output projection. We use the layer default projection. (Geography/SIRGAS2000)
    srsName="EPSG:4674"
    @@ -98,12 +128,77 @@ def __buildQueryString(self):
    allLocalParams.pop("self",None)
    PARAMS="&".join("{}={}".format(k,v) for k,v in allLocalParams.items())
    return PARAMS

    def get(self):
    url="http://{0}/geoserver/ows?{1}".format(self.URL_BASE, self.__buildQueryString())

    def __xmlRequest(self, url):
    root=None
    if self.AUTH:
    response=requests.get(url, auth=self.AUTH)
    else:
    response=requests.get(url)

    if response.ok:
    xmlInMemory = io.BytesIO(response.content)
    tree = xmlTree.parse(xmlInMemory)
    root = tree.getroot()

    return root

    def __getServerLimit(self):
    """
    Read the data download service limit via WFS
    """
    url="{0}?{1}".format(self.__buildBaseURL(),"service=wfs&version=2.0.0&request=GetCapabilities")
    # the default limit on our GeoServer
    serverLimit=100000

    XML=self.__xmlRequest(url)

    if '{http://www.opengis.net/wfs/2.0}WFS_Capabilities'==XML.tag:
    for p in XML.findall(".//{http://www.opengis.net/ows/1.1}Operation/[@name='GetFeature']"):
    dv=p.find(".//{http://www.opengis.net/ows/1.1}Constraint/[@name='CountDefault']")
    serverLimit=dv.find('.//{http://www.opengis.net/ows/1.1}DefaultValue').text

    return int(serverLimit)

    def __countMaxResult(self):
    """
    Read the number of lines of results expected in the download using the defined filters.
    """
    url="{0}?{1}".format(self.__buildBaseURL(), self.__buildQueryString())
    url="{0}&{1}".format(url,"resultType=hits")
    numberMatched=0

    XML=self.__xmlRequest(url)
    if '{http://www.opengis.net/wfs/2.0}FeatureCollection'==XML.tag:
    numberMatched=XML.find('[@numberMatched]').get('numberMatched')

    return int(numberMatched)

    def __pagination(self):
    # get server limit and count max number of results
    sl=self.__getServerLimit()
    rr=self.__countMaxResult()
    # define the start page number
    pagNumber=1
    # define the start index of data
    startIndex=0
    # define the attribute to sort data
    sortBy="gid"
    # using the server limit to each download
    count=sl
    # pagination iteraction
    while(startIndex<rr):
    paginationParams="&count={0}&sortBy={1}&startIndex={2}".format(count,sortBy,startIndex)
    self.__download(paginationParams,pagNumber)
    startIndex=startIndex+count
    pagNumber=pagNumber+1

    def __download(self, pagination="startIndex=0", pagNumber=1):
    url="{0}?{1}&{2}".format(self.__buildBaseURL(), self.__buildQueryString(), pagination)

    dir_name=os.path.realpath(os.path.dirname(__file__))
    # the extension of output file is ".zip" because the OUTPUTFORMAT is defined as "SHAPE-ZIP"
    output_file="{0}/{1}.zip".format(dir_name,self.OUTPUT_FILENAME)
    output_file="{0}/{1}_part{2}.zip".format(dir_name, self.OUTPUT_FILENAME, pagNumber)
    if self.AUTH:
    response=requests.get(url, auth=self.AUTH)
    else:
    @@ -114,11 +209,16 @@ def get(self):
    else:
    print("Download fail with HTTP Error: {0}".format(response.status_code))

    def get(self):
    self.__pagination()

    # end of class

    # To call without credentials (uses the public layer name)
    down=DownloadWFS()

    # To call with credentials (needs change the layer name)
    #down=DownloadWFS("user_name","password")
    # down=DownloadWFS("user","pass")

    # Call download
    down.get()
  3. @andre-carvalho andre-carvalho revised this gist Jun 1, 2020. 1 changed file with 21 additions and 8 deletions.
    29 changes: 21 additions & 8 deletions download-deter-data.py
    Original file line number Diff line number Diff line change
    @@ -2,7 +2,7 @@
    Download WFS
    Download Shapefile DETER
    Copyright 2020 Andre Carvalho
    Copyright 2020 TerraBrasilis
    Usage:
    download-deter-data.py
    @@ -38,7 +38,7 @@ class DownloadWFS:
    parameter values ​​in accordance with the respective notes.
    """

    def __init__(self,user="user",password="pass"):
    def __init__(self,user=None,password=None):
    """
    Constructor with predefined settings.
    @@ -58,14 +58,18 @@ def __init__(self,user="user",password="pass"):
    self.END_DATE=datetime.today().strftime('%Y-%m-%d')
    self.UF="PA"

    # To change the desired layer, change these two values ​​below.
    self.WORKSPACE_NAME="deter-amz"
    self.LAYER_NAME="deter_daily_auth"
    # To change the desired layer, change the following value.
    # The public layer "deter_amz" or the controled layer "deter_amz_auth"
    self.LAYER_NAME="deter_amz"

    # The output file name (layer_name_start_date_end_date_uf)
    self.OUTPUT_FILENAME="{0}_{1}_{2}_{3}".format(self.LAYER_NAME,self.START_DATE,self.END_DATE, self.UF)

    self.AUTH=HTTPBasicAuth(user, password)
    self.AUTH=None

    if user and password:
    self.AUTH=HTTPBasicAuth(user, password)

    self.URL_BASE="terrabrasilis.dpi.inpe.br"

    def __buildQueryString(self):
    @@ -100,12 +104,21 @@ def get(self):
    dir_name=os.path.realpath(os.path.dirname(__file__))
    # the extension of output file is ".zip" because the OUTPUTFORMAT is defined as "SHAPE-ZIP"
    output_file="{0}/{1}.zip".format(dir_name,self.OUTPUT_FILENAME)
    response=requests.get(url, auth=self.AUTH)
    if self.AUTH:
    response=requests.get(url, auth=self.AUTH)
    else:
    response=requests.get(url)
    if response.ok:
    with open(output_file, 'wb') as f:
    f.write(response.content)
    else:
    print("Download fail with HTTP Error: {0}".format(response.status_code))

    down=DownloadWFS("user_name","password")
    # To call without credentials (uses the public layer name)
    down=DownloadWFS()

    # To call with credentials (needs change the layer name)
    #down=DownloadWFS("user_name","password")

    # Call download
    down.get()
  4. @andre-carvalho andre-carvalho revised this gist Jun 1, 2020. No changes.
  5. @andre-carvalho andre-carvalho revised this gist Jun 1, 2020. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion download-deter-data.py
    Original file line number Diff line number Diff line change
    @@ -2,7 +2,7 @@
    Download WFS
    Download Shapefile DETER
    Copyright 2020 TerraBrasilis
    Copyright 2020 Andre Carvalho
    Usage:
    download-deter-data.py
  6. @andre-carvalho andre-carvalho created this gist Jun 1, 2020.
    111 changes: 111 additions & 0 deletions download-deter-data.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,111 @@
    """
    Download WFS
    Download Shapefile DETER
    Copyright 2020 TerraBrasilis
    Usage:
    download-deter-data.py
    Options:
    no have
    Disclaimer.
    This is an example, therefore, we do not implement all error handling or cover all
    problems related to the download process.
    It is highly recommended that you improve and adapt this code to your specifics.
    If you run this script in the same directory on the same day, the output file will be replaced.
    Before start, read the tecnical posts into our Blog.
    http://terrabrasilis.dpi.inpe.br/categoria/conteudo-tecnico/
    """

    import requests, os
    from requests.auth import HTTPBasicAuth
    from datetime import datetime

    class DownloadWFS:
    """
    Define configurations on instantiate.
    First parameter: user, The user name used to authentication on the server.
    Second parameter: password, The password value used to authentication on the server.
    To change the predefined settings, inside the constructor, edit the
    parameter values ​​in accordance with the respective notes.
    """

    def __init__(self,user="user",password="pass"):
    """
    Constructor with predefined settings.
    The start date is set by manually changing the value of the START_DATE parameter, below.
    The end date is automatically detected in the machine's calendar.
    The next filter on the states, uses the UF parameter and the
    values ​​are the acronyms of the state names, like PA or AM.
    Important note: If the range used to filter is very wide, the number
    of resources returned may be greater than the maximum limit allowed by the server.
    """

    # warning: before change the time interval, pay attention into notes on constructor.
    self.START_DATE="2020-01-01"
    self.END_DATE=datetime.today().strftime('%Y-%m-%d')
    self.UF="PA"

    # To change the desired layer, change these two values ​​below.
    self.WORKSPACE_NAME="deter-amz"
    self.LAYER_NAME="deter_daily_auth"

    # The output file name (layer_name_start_date_end_date_uf)
    self.OUTPUT_FILENAME="{0}_{1}_{2}_{3}".format(self.LAYER_NAME,self.START_DATE,self.END_DATE, self.UF)

    self.AUTH=HTTPBasicAuth(user, password)
    self.URL_BASE="terrabrasilis.dpi.inpe.br"

    def __buildQueryString(self):
    """
    Building the query string to call the WFS service.
    To change defined filters and discover more possibilities
    you should learn more about WFS standard and how to filter using CQL.
    https://www.ogc.org/standards/wfs
    """
    # Filters example (by date interval and uf)
    CQL_FILTER="date BETWEEN '{0}' AND '{1}' AND uf='{2}'".format(self.START_DATE,self.END_DATE,self.UF)
    # WFS parameters
    SERVICE="WFS"
    REQUEST="GetFeature"
    VERSION="2.0.0"
    # if OUTPUTFORMAT is changed, check the output file extension within the get method in this class.
    OUTPUTFORMAT="SHAPE-ZIP"
    exceptions="text/xml"
    # define the output projection. We use the layer default projection. (Geography/SIRGAS2000)
    srsName="EPSG:4674"
    # the layer definition
    TYPENAME="{0}:{1}".format(self.WORKSPACE_NAME,self.LAYER_NAME)

    allLocalParams=locals()
    allLocalParams.pop("self",None)
    PARAMS="&".join("{}={}".format(k,v) for k,v in allLocalParams.items())
    return PARAMS

    def get(self):
    url="http://{0}/geoserver/ows?{1}".format(self.URL_BASE, self.__buildQueryString())
    dir_name=os.path.realpath(os.path.dirname(__file__))
    # the extension of output file is ".zip" because the OUTPUTFORMAT is defined as "SHAPE-ZIP"
    output_file="{0}/{1}.zip".format(dir_name,self.OUTPUT_FILENAME)
    response=requests.get(url, auth=self.AUTH)
    if response.ok:
    with open(output_file, 'wb') as f:
    f.write(response.content)
    else:
    print("Download fail with HTTP Error: {0}".format(response.status_code))

    down=DownloadWFS("user_name","password")
    down.get()