Created
May 10, 2021 14:33
-
-
Save fjcerignoni/8c53cb28a969eae8f2023c0b06b37552 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --===================================================================================== | |
| -- CLEANING THE CAR | |
| --===================================================================================== | |
| -- Author: Felipe Cerignoni | |
| -- Institution: Imaflora | |
| -- Last update: 10/05/2021 | |
| --===================================================================================== | |
| --------------------------------------------------------------------------------------- | |
| -- MATERIALIZED VIEW TO IDENTIFY ALL OVERLAPS------------------------------------------ | |
| CREATE MATERIALIZED VIEW car.pa_br_20210412_areaimovel_ovlps | |
| AS WITH car AS ( | |
| SELECT * FROM car.pa_br_20210412_areaimovel_original | |
| WHERE NOT cod_imovel IN ( SELECT cod_imovel | |
| FROM car.pa_br_20210412_poligonosexcluidos) | |
| ) | |
| SELECT a.fid as gid, | |
| a.cod_imovel, | |
| a.nu_modulo_fiscal, | |
| a.tp_imovel, | |
| a.geom, | |
| count(b.fid) AS ovlp_count, | |
| array_agg(b.fid) AS ovlp_features, | |
| array_agg(b.geom) AS ovlp_geoms | |
| FROM car a | |
| LEFT JOIN car.pa_br_20210412_areaimovel_original b | |
| ON a.fid <> b.fid | |
| AND st_intersects(a.geom, b.geom) | |
| AND NOT st_touches(a.geom, b.geom) | |
| GROUP BY a.fid, a.cod_imovel, a.nu_modulo_fiscal, a.tp_imovel, a.geom; | |
| --ALTER TABLE car.pa_br_20210412_areaimovel_ovlps RENAME COLUMN fid TO gid; | |
| -- TODO mudar a coluna fid para gid no script de download do CAR | |
| SELECT count(*) FROM car.pa_br_20210412_areaimovel_ovlps; -- 6.068.353 | |
| --------------------------------------------------------------------------------------- | |
| -- TABLE TO SELECT ALL CARS THAT DO NOT OVERLAP----------------------------------------- | |
| -- DROP TABLE car.malhacar_2021_proc0_noovlp; | |
| CREATE TABLE car.malhacar_2021_proc0_noovlp AS ( | |
| SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, true::bool as premium, geom | |
| FROM car.pa_br_20210412_areaimovel_ovlps | |
| WHERE ovlp_count = 0 | |
| ); -- 912.225 | |
| --------------------------------------------------------------------------------------- | |
| -- TABLE TO SELECT ALL CARS THAT HAVE OVERLAPS----------------------------------------- | |
| CREATE TABLE car.malhacar_2021_proc1 AS ( | |
| SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, geom, ovlp_features, ovlp_geoms | |
| FROM car.pa_br_20210412_areaimovel_ovlps | |
| WHERE ovlp_count <> 0 | |
| ); -- 5.156.128 | |
| ALTER TABLE car.malhacar_2021_proc1 ADD PRIMARY KEY (gid); | |
| CREATE INDEX malhacar_2021_proc1_geom_idx ON car.malhacar_2021_proc1 USING gist(geom); | |
| --------------------------------------------------------------------------------------- | |
| -- TABLE TO DEFINE IF CAR IS PREMIUM OR POOR ------------------------------------------ | |
| DROP TABLE car.malhacar_2021_proc2 CASCADE; | |
| CREATE TABLE car.malhacar_2021_proc2 ( | |
| gid int, | |
| cod_imovel text, | |
| tp_imovel text, | |
| nu_modulo_fiscal numeric, | |
| ovlp_count int, | |
| area_orig numeric, | |
| area_final numeric, | |
| ovlp_ratio numeric, | |
| premium bool, | |
| geom geometry, | |
| geom_diff geometry, | |
| ovlp_features int[] | |
| ); | |
| -- PARALELO: | |
| --SELECT :var_proc proc; | |
| --INSERT INTO car.malhacar_2021_proc2 (gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, area_orig, area_final, ovlp_ratio, premium, geom, geom_diff, ovlp_features) | |
| --WITH difference AS ( | |
| -- SELECT a.gid, a.cod_imovel, a.tp_imovel, a.nu_modulo_fiscal, a.ovlp_count, a.geom, | |
| -- st_difference(a.geom, st_union(d_geom)) as b_geom, ovlp_features | |
| -- FROM car.malhacar_2021_proc1 a, | |
| -- LATERAL (SELECT (st_dump(geom)).geom as d_geom FROM unnest(a.ovlp_geoms) as geom) foo | |
| -- WHERE (a.gid % 40) = :var_proc | |
| -- GROUP BY a.gid, a.cod_imovel, a.nu_modulo_fiscal, a.tp_imovel, a.ovlp_count, a.geom, ovlp_features | |
| -- | |
| -- ), | |
| -- areas AS ( | |
| -- SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, | |
| -- st_area(st_transform(geom, 97823))/10000 as area_orig, | |
| -- st_area(st_transform(b_geom, 97823))/10000 as area_final, | |
| -- geom, b_geom, ovlp_features | |
| -- FROM difference | |
| -- ), | |
| -- ratio AS ( | |
| -- SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, area_orig, area_final, | |
| -- area_final/area_orig as area_orig_ratio, geom, b_geom, ovlp_features | |
| -- FROM areas) | |
| -- | |
| --SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, | |
| -- area_orig, area_final, area_orig_ratio, | |
| -- CASE | |
| -- WHEN nu_modulo_fiscal <= 4 AND ovlp_ratio >= 0.9 AND ovlp_count <= 3 THEN true | |
| -- WHEN nu_modulo_fiscal <= 15 AND ovlp_ratio >= 0.97 AND ovlp_count <= 5 THEN true | |
| -- WHEN nu_modulo_fiscal > 15 AND ovlp_ratio >= 0.95 AND ovlp_count <= 7 THEN true | |
| -- ELSE false | |
| -- END AS premium, | |
| -- geom, b_geom, ovlp_features | |
| --FROM ratio; | |
| ALTER TABLE car.malhacar_2021_proc2 ADD PRIMARY KEY(gid); | |
| SELECT count(distinct gid) FROM car.malhacar_2021_proc2; --5.156.128 | |
| --------------------------------------------------------------------------------------- | |
| -- TABLE TO COMPARE ALL OVELAPS ONE BY ONE -------------------------------------------- | |
| DROP TABLE car.malhacar_2021_proc3; | |
| CREATE TABLE car.malhacar_2021_proc3 AS ( | |
| SELECT a.gid, a.cod_imovel, a.tp_imovel, a.nu_modulo_fiscal, a.ovlp_count, a.premium, a.geom, | |
| b.gid as gid_b, b.cod_imovel as cod_imovel_b, b.tp_imovel as tp_imovel_b, | |
| b.nu_modulo_fiscal as nu_modulo_fiscal_b, | |
| b.ovlp_count as ovlp_count_b, | |
| b.premium as premium_b, b.geom as geom_b | |
| FROM car.malhacar_2021_proc2 a | |
| LEFT JOIN car.malhacar_2021_proc2 b | |
| ON a.gid <> b.gid | |
| AND b.gid = ANY (a.ovlp_features) | |
| WHERE a.ovlp_count <> 0 | |
| ORDER BY a.gid, b.gid | |
| ); | |
| --------------------------------------------------------------------------------------- | |
| -- TABLE TO PROCESS OVERLAPS BASED ON CONSTRAINTS ------------------------------------- | |
| CREATE TABLE car.malhacar_2021_proc4 ( | |
| gid int, | |
| cod_imovel text, | |
| tp_imovel text, | |
| nu_modulo_fiscal numeric, | |
| ovlp_count int, | |
| premium bool, | |
| mod_geom int, | |
| geom geometry, | |
| gid_b int, | |
| cod_imovel_b text | |
| ); | |
| -- PARALELO: | |
| --SELECT :var_proc proc; | |
| -- | |
| --INSERT INTO car.malhacar_2021_proc4 (gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, | |
| -- geom, mod_geom , gid_b, cod_imovel_b) | |
| --SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, | |
| -- CASE | |
| -- WHEN tp_imovel = tp_imovel_b THEN | |
| -- CASE | |
| -- WHEN premium = premium_b THEN | |
| -- CASE WHEN gid > gid_b THEN st_difference(geom, geom_b) ELSE geom END | |
| -- WHEN premium IS FALSE AND premium_b IS TRUE THEN st_difference(geom, geom_b) | |
| -- ELSE geom | |
| -- END | |
| -- ELSE | |
| -- CASE | |
| -- WHEN tp_imovel = 'PCT' THEN st_difference(geom, geom_b) | |
| -- WHEN tp_imovel = 'AST' AND tp_imovel_b = 'IRU' THEN st_difference(geom, geom_b) | |
| -- ELSE geom | |
| -- END | |
| -- END as geom, | |
| -- CASE | |
| -- WHEN tp_imovel = tp_imovel_b THEN | |
| -- CASE | |
| -- WHEN premium = premium_b THEN | |
| -- CASE WHEN gid > gid_b THEN 1 ELSE 0 END | |
| -- WHEN premium IS FALSE AND premium_b IS TRUE THEN 1 | |
| -- ELSE 0 | |
| -- END | |
| -- ELSE | |
| -- CASE | |
| -- WHEN tp_imovel = 'PCT' THEN 1 | |
| -- WHEN tp_imovel = 'AST' AND tp_imovel_b = 'IRU' THEN 1 | |
| -- ELSE 0 | |
| -- END | |
| -- END as mod_geom, | |
| -- gid_b, cod_imovel_b | |
| --FROM car.malhacar_2021_proc3 | |
| --WHERE (gid % 40) :var_proc; | |
| --------------------------------------------------------------------------------------- | |
| -- TABLE TO SELECT ALL CARS THAT HAVE NO GEOMETRY MODIFICATIONS ---------------------- | |
| DROP TABLE car.malhacar_2021_proc5_nomod; | |
| CREATE TABLE car.malhacar_2021_proc5_nomod AS ( | |
| WITH agg AS ( | |
| SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, | |
| array_agg(mod_geom) mod_geoms, array_agg(geom) geoms | |
| FROM car.malhacar_2021_proc4 | |
| GROUP BY gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium | |
| ) | |
| SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, geoms[1]::geometry as geom | |
| FROM agg | |
| JOIN LATERAL (SELECT sum(el) as soma FROM unnest(mod_geoms) as el) foo ON soma = 0 | |
| ); -- 1.850.208 | |
| --------------------------------------------------------------------------------------- | |
| -- TABLE TO AGGREGATE ALL GEOMETRY MODIFICATIONS BY CAR ------------------------------- | |
| CREATE TABLE car.malhacar_2021_proc6 ( | |
| gid int, | |
| cod_imovel text, | |
| tp_imovel text, | |
| nu_modulo_fiscal numeric, | |
| ovlp_count int, | |
| premium bool, | |
| geom geometry | |
| ); | |
| -- PARALELO: | |
| --INSERT INTO car.malhacar_2021_proc6 (gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, geom) | |
| --WITH agg AS ( | |
| -- SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, | |
| -- array_agg(mod_geom) mod_geoms, array_agg(geom) geoms | |
| -- FROM car.malhacar_2021_proc4 | |
| -- WHERE (gid % 40) = :var_proc | |
| -- GROUP BY gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium | |
| -- ), | |
| -- mods AS ( SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, geoms | |
| -- FROM agg | |
| -- JOIN LATERAL (SELECT sum(el) as soma FROM unnest(mod_geoms) as el) foo ON soma <> 0) | |
| -- | |
| --SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, | |
| -- st_intersects(geoms) -- custom function -> public.st_intersects(geometry [], int default 3) | |
| --FROM mods; | |
| ALTER TABLE car.malhacar_2021_proc6 ADD PRIMARY KEY(gid); | |
| SELECT count(distinct gid) FROM car.malhacar_2021_proc6; --3.305.920 | |
| --------------------------------------------------------------------------------------- | |
| -- TABLE TO CONSOLIDATE THE RESULTS --------------------------------------------------- | |
| CREATE TABLE car.malhacar_2021_proc7 AS ( | |
| -- SELECT count(distinct gid) FROM car.malhacar_2021_proc0_noovlp; --912.225 | |
| SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, 0::int as ovlp_count, premium, geom FROM car.malhacar_2021_proc0_noovlp | |
| UNION | |
| -- SELECT count(distinct gid) FROM car.malhacar_2021_proc5_nomod; --1.850.208 | |
| SELECT * FROM car.malhacar_2021_proc5_nomod | |
| UNION | |
| SELECT * FROM car.malhacar_2021_proc6 --3.305.920 | |
| ); -- 6.068.353 | |
| ALTER TABLE car.malhacar_2021_proc7 ADD PRIMARY KEY (gid); | |
| CREATE INDEX malhacar_2021_proc7_geom_idx ON car.malhacar_2021_proc7 USING gist(geom); | |
| SELECT st_geometrytype(geom), count(*) FROM car.malhacar_2021_proc7 GROUP BY st_geometrytype(geom); -- 1 NULL GEOMETRY | |
| SELECT st_isempty(geom), count(*) FROM car.malhacar_2021_proc7 GROUP BY st_isempty(geom); | |
| --- FALSE 5.962.976 | |
| --- TRUE 105.376 | |
| --- NULL 1 | |
| -- TOTAL INPUT: 6.068.353 | |
| -- TOTAL OUTPUT: 6.068.353 | |
| -- TABLE TO VERIFY RESIDUAL ----------------------------------------------------------- | |
| CREATE TABLE car.malhacar_2021_proc8_final AS ( | |
| SELECT gid, cod_imovel, tp_imovel, nu_modulo_fiscal, ovlp_count, premium, foo.num_geom, geom | |
| FROM car.malhacar_2021_proc7, | |
| LATERAL ( SELECT count(geoms) AS num_geom FROM st_dump(geom) geoms) foo | |
| ); | |
| --------------------------------------------------------------------------------------- | |
| -- -- CUSTOM FUNTION: | |
| --CREATE OR REPLACE FUNCTION public.st_intersection(geoms geometry[], collection integer default 3) | |
| -- RETURNS geometry | |
| -- LANGUAGE plpgsql | |
| --AS $function$ | |
| --DECLARE | |
| -- i integer; | |
| -- tmpGeom geometry; | |
| --BEGIN | |
| -- tmpGeom := geoms[1]; | |
| -- FOR i IN 1..array_length(geoms,1) LOOP | |
| -- tmpGeom:= ST_CollectionExtract(ST_Intersection(tmpGeom,geoms[i]), collection); | |
| -- END LOOP; | |
| -- RETURN tmpGeom; | |
| --END; | |
| --$function$; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment