Skip to content

Instantly share code, notes, and snippets.

@fjcerignoni
Created May 10, 2021 14:33
Show Gist options
  • Save fjcerignoni/8c53cb28a969eae8f2023c0b06b37552 to your computer and use it in GitHub Desktop.
Save fjcerignoni/8c53cb28a969eae8f2023c0b06b37552 to your computer and use it in GitHub Desktop.
--=====================================================================================
-- CLEANING THE CAR
--=====================================================================================
-- Author: Felipe Cerignoni
-- Institution: Imaflora
-- Last update: 10/05/2021
--=====================================================================================
---------------------------------------------------------------------------------------
-- MATERIALIZED VIEW TO IDENTIFY ALL OVERLAPS------------------------------------------
CREATE MATERIALIZED VIEW car.pa_br_20210412_areaimovel_ovlps
AS WITH car AS (
SELECT * FROM car.pa_br_20210412_areaimovel_original
WHERE NOT cod_imovel IN ( SELECT cod_imovel
FROM car.pa_br_20210412_poligonosexcluidos)
)
SELECT a.fid as gid,
a.cod_imovel,
a.nu_modulo_fiscal,
a.tp_imovel,
a.geom,
count(b.fid) AS ovlp_count,
array_agg(b.fid) AS ovlp_features,
array_agg(b.geom) AS ovlp_geoms
FROM car a
LEFT JOIN car.pa_br_20210412_areaimovel_original b
ON a.fid <> b.fid
AND st_intersects(a.geom, b.geom)
AND NOT st_touches(a.geom, b.geom)
GROUP BY a.fid, a.cod_imovel, a.nu_modulo_fiscal, a.tp_imovel, a.geom;
--ALTER TABLE car.pa_br_20210412_areaimovel_ovlps RENAME COLUMN fid TO gid;
-- TODO mudar a coluna fid para gid no script de download do CAR
SELECT count(*) FROM car.pa_br_20210412_areaimovel_ovlps; -- 6.068.353
---------------------------------------------------------------------------------------
-- TABLE TO SELECT ALL CARS THAT DO NOT OVERLAP-----------------------------------------
-- DROP TABLE car.malhacar_2021_proc0_noovlp;
CREATE TABLE car.malhacar_2021_proc0_noovlp AS (
SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, true::bool as premium, geom
FROM car.pa_br_20210412_areaimovel_ovlps
WHERE ovlp_count = 0
); -- 912.225
---------------------------------------------------------------------------------------
-- TABLE TO SELECT ALL CARS THAT HAVE OVERLAPS-----------------------------------------
CREATE TABLE car.malhacar_2021_proc1 AS (
SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, geom, ovlp_features, ovlp_geoms
FROM car.pa_br_20210412_areaimovel_ovlps
WHERE ovlp_count <> 0
); -- 5.156.128
ALTER TABLE car.malhacar_2021_proc1 ADD PRIMARY KEY (gid);
CREATE INDEX malhacar_2021_proc1_geom_idx ON car.malhacar_2021_proc1 USING gist(geom);
---------------------------------------------------------------------------------------
-- TABLE TO DEFINE IF CAR IS PREMIUM OR POOR ------------------------------------------
DROP TABLE car.malhacar_2021_proc2 CASCADE;
CREATE TABLE car.malhacar_2021_proc2 (
gid int,
cod_imovel text,
tp_imovel text,
nu_modulo_fiscal numeric,
ovlp_count int,
area_orig numeric,
area_final numeric,
ovlp_ratio numeric,
premium bool,
geom geometry,
geom_diff geometry,
ovlp_features int[]
);
-- PARALELO:
--SELECT :var_proc proc;
--INSERT INTO car.malhacar_2021_proc2 (gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, area_orig, area_final, ovlp_ratio, premium, geom, geom_diff, ovlp_features)
--WITH difference AS (
-- SELECT a.gid, a.cod_imovel, a.tp_imovel, a.nu_modulo_fiscal, a.ovlp_count, a.geom,
-- st_difference(a.geom, st_union(d_geom)) as b_geom, ovlp_features
-- FROM car.malhacar_2021_proc1 a,
-- LATERAL (SELECT (st_dump(geom)).geom as d_geom FROM unnest(a.ovlp_geoms) as geom) foo
-- WHERE (a.gid % 40) = :var_proc
-- GROUP BY a.gid, a.cod_imovel, a.nu_modulo_fiscal, a.tp_imovel, a.ovlp_count, a.geom, ovlp_features
--
-- ),
-- areas AS (
-- SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count,
-- st_area(st_transform(geom, 97823))/10000 as area_orig,
-- st_area(st_transform(b_geom, 97823))/10000 as area_final,
-- geom, b_geom, ovlp_features
-- FROM difference
-- ),
-- ratio AS (
-- SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, area_orig, area_final,
-- area_final/area_orig as area_orig_ratio, geom, b_geom, ovlp_features
-- FROM areas)
--
--SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count,
-- area_orig, area_final, area_orig_ratio,
-- CASE
-- WHEN nu_modulo_fiscal <= 4 AND ovlp_ratio >= 0.9 AND ovlp_count <= 3 THEN true
-- WHEN nu_modulo_fiscal <= 15 AND ovlp_ratio >= 0.97 AND ovlp_count <= 5 THEN true
-- WHEN nu_modulo_fiscal > 15 AND ovlp_ratio >= 0.95 AND ovlp_count <= 7 THEN true
-- ELSE false
-- END AS premium,
-- geom, b_geom, ovlp_features
--FROM ratio;
ALTER TABLE car.malhacar_2021_proc2 ADD PRIMARY KEY(gid);
SELECT count(distinct gid) FROM car.malhacar_2021_proc2; --5.156.128
---------------------------------------------------------------------------------------
-- TABLE TO COMPARE ALL OVELAPS ONE BY ONE --------------------------------------------
DROP TABLE car.malhacar_2021_proc3;
CREATE TABLE car.malhacar_2021_proc3 AS (
SELECT a.gid, a.cod_imovel, a.tp_imovel, a.nu_modulo_fiscal, a.ovlp_count, a.premium, a.geom,
b.gid as gid_b, b.cod_imovel as cod_imovel_b, b.tp_imovel as tp_imovel_b,
b.nu_modulo_fiscal as nu_modulo_fiscal_b,
b.ovlp_count as ovlp_count_b,
b.premium as premium_b, b.geom as geom_b
FROM car.malhacar_2021_proc2 a
LEFT JOIN car.malhacar_2021_proc2 b
ON a.gid <> b.gid
AND b.gid = ANY (a.ovlp_features)
WHERE a.ovlp_count <> 0
ORDER BY a.gid, b.gid
);
---------------------------------------------------------------------------------------
-- TABLE TO PROCESS OVERLAPS BASED ON CONSTRAINTS -------------------------------------
CREATE TABLE car.malhacar_2021_proc4 (
gid int,
cod_imovel text,
tp_imovel text,
nu_modulo_fiscal numeric,
ovlp_count int,
premium bool,
mod_geom int,
geom geometry,
gid_b int,
cod_imovel_b text
);
-- PARALELO:
--SELECT :var_proc proc;
--
--INSERT INTO car.malhacar_2021_proc4 (gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium,
-- geom, mod_geom , gid_b, cod_imovel_b)
--SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium,
-- CASE
-- WHEN tp_imovel = tp_imovel_b THEN
-- CASE
-- WHEN premium = premium_b THEN
-- CASE WHEN gid > gid_b THEN st_difference(geom, geom_b) ELSE geom END
-- WHEN premium IS FALSE AND premium_b IS TRUE THEN st_difference(geom, geom_b)
-- ELSE geom
-- END
-- ELSE
-- CASE
-- WHEN tp_imovel = 'PCT' THEN st_difference(geom, geom_b)
-- WHEN tp_imovel = 'AST' AND tp_imovel_b = 'IRU' THEN st_difference(geom, geom_b)
-- ELSE geom
-- END
-- END as geom,
-- CASE
-- WHEN tp_imovel = tp_imovel_b THEN
-- CASE
-- WHEN premium = premium_b THEN
-- CASE WHEN gid > gid_b THEN 1 ELSE 0 END
-- WHEN premium IS FALSE AND premium_b IS TRUE THEN 1
-- ELSE 0
-- END
-- ELSE
-- CASE
-- WHEN tp_imovel = 'PCT' THEN 1
-- WHEN tp_imovel = 'AST' AND tp_imovel_b = 'IRU' THEN 1
-- ELSE 0
-- END
-- END as mod_geom,
-- gid_b, cod_imovel_b
--FROM car.malhacar_2021_proc3
--WHERE (gid % 40) :var_proc;
---------------------------------------------------------------------------------------
-- TABLE TO SELECT ALL CARS THAT HAVE NO GEOMETRY MODIFICATIONS ----------------------
DROP TABLE car.malhacar_2021_proc5_nomod;
CREATE TABLE car.malhacar_2021_proc5_nomod AS (
WITH agg AS (
SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium,
array_agg(mod_geom) mod_geoms, array_agg(geom) geoms
FROM car.malhacar_2021_proc4
GROUP BY gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium
)
SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, geoms[1]::geometry as geom
FROM agg
JOIN LATERAL (SELECT sum(el) as soma FROM unnest(mod_geoms) as el) foo ON soma = 0
); -- 1.850.208
---------------------------------------------------------------------------------------
-- TABLE TO AGGREGATE ALL GEOMETRY MODIFICATIONS BY CAR -------------------------------
CREATE TABLE car.malhacar_2021_proc6 (
gid int,
cod_imovel text,
tp_imovel text,
nu_modulo_fiscal numeric,
ovlp_count int,
premium bool,
geom geometry
);
-- PARALELO:
--INSERT INTO car.malhacar_2021_proc6 (gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, geom)
--WITH agg AS (
-- SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium,
-- array_agg(mod_geom) mod_geoms, array_agg(geom) geoms
-- FROM car.malhacar_2021_proc4
-- WHERE (gid % 40) = :var_proc
-- GROUP BY gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium
-- ),
-- mods AS ( SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, geoms
-- FROM agg
-- JOIN LATERAL (SELECT sum(el) as soma FROM unnest(mod_geoms) as el) foo ON soma <> 0)
--
--SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium,
-- st_intersects(geoms) -- custom function -> public.st_intersects(geometry [], int default 3)
--FROM mods;
ALTER TABLE car.malhacar_2021_proc6 ADD PRIMARY KEY(gid);
SELECT count(distinct gid) FROM car.malhacar_2021_proc6; --3.305.920
---------------------------------------------------------------------------------------
-- TABLE TO CONSOLIDATE THE RESULTS ---------------------------------------------------
CREATE TABLE car.malhacar_2021_proc7 AS (
-- SELECT count(distinct gid) FROM car.malhacar_2021_proc0_noovlp; --912.225
SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, 0::int as ovlp_count, premium, geom FROM car.malhacar_2021_proc0_noovlp
UNION
-- SELECT count(distinct gid) FROM car.malhacar_2021_proc5_nomod; --1.850.208
SELECT * FROM car.malhacar_2021_proc5_nomod
UNION
SELECT * FROM car.malhacar_2021_proc6 --3.305.920
); -- 6.068.353
ALTER TABLE car.malhacar_2021_proc7 ADD PRIMARY KEY (gid);
CREATE INDEX malhacar_2021_proc7_geom_idx ON car.malhacar_2021_proc7 USING gist(geom);
SELECT st_geometrytype(geom), count(*) FROM car.malhacar_2021_proc7 GROUP BY st_geometrytype(geom); -- 1 NULL GEOMETRY
SELECT st_isempty(geom), count(*) FROM car.malhacar_2021_proc7 GROUP BY st_isempty(geom);
--- FALSE 5.962.976
--- TRUE 105.376
--- NULL 1
-- TOTAL INPUT: 6.068.353
-- TOTAL OUTPUT: 6.068.353
-- TABLE TO VERIFY RESIDUAL -----------------------------------------------------------
CREATE TABLE car.malhacar_2021_proc8_final AS (
SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, foo.num_geom, geom
FROM car.malhacar_2021_proc7,
LATERAL ( SELECT count(geoms) AS num_geom FROM st_dump(geom) geoms) foo
);
---------------------------------------------------------------------------------------
-- -- CUSTOM FUNTION:
--CREATE OR REPLACE FUNCTION public.st_intersection(geoms geometry[], collection integer default 3)
-- RETURNS geometry
-- LANGUAGE plpgsql
--AS $function$
--DECLARE
-- i integer;
-- tmpGeom geometry;
--BEGIN
-- tmpGeom := geoms[1];
-- FOR i IN 1..array_length(geoms,1) LOOP
-- tmpGeom:= ST_CollectionExtract(ST_Intersection(tmpGeom,geoms[i]), collection);
-- END LOOP;
-- RETURN tmpGeom;
--END;
--$function$;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment