Created
November 6, 2024 00:54
-
-
Save mickley/4308bd972c2581ab3dcc25bda7ae5232 to your computer and use it in GitHub Desktop.
OpenRefine Cleaning steps used for Vouchervision data at the Oregon State University Herbarium
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [ | |
| { | |
| "op": "core/text-transform", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "genus", | |
| "expression": "value.toTitlecase()", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10, | |
| "description": "Text transform on cells in column genus using expression value.toTitlecase()" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "specificEpithet", | |
| "expression": "value.toLowercase()", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10, | |
| "description": "Text transform on cells in column specificEpithet using expression value.toLowercase()" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "infraspecificEpithet", | |
| "expression": "value.toLowercase()", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10, | |
| "description": "Text transform on cells in column infraspecificEpithet using expression value.toLowercase()" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "country", | |
| "expression": "value.toTitlecase()", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10, | |
| "description": "Text transform on cells in column country using expression value.toTitlecase()" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "stateProvince", | |
| "expression": "value.toTitlecase()", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10, | |
| "description": "Text transform on cells in column stateProvince using expression value.toTitlecase()" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "county", | |
| "expression": "value.toTitlecase()", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10, | |
| "description": "Text transform on cells in column county using expression value.toTitlecase()" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "county", | |
| "expression": "grel:value.replace(/^(.+)( Co$?([\\.|u].*)?)/, \"$1\")", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10, | |
| "description": "Text transform on cells in column county using expression grel:value.replace(/^(.+)( Co$?([\\.|u].*)?)/, \"$1\")" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "country", | |
| "expression": "grel:value.replace(/^(U\\.?S\\.?A\\.?)/, \"United States\")", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10, | |
| "description": "Text transform on cells in column country using expression grel:value.replace(/^(U\\.?S\\.?A\\.?)/, \"United States\")" | |
| }, | |
| { | |
| "op": "core/column-addition", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "baseColumnName": "cultivated", | |
| "expression": "grel:if(isBlank(value), 'wild collection', 'cultivated')", | |
| "onError": "set-to-blank", | |
| "newColumnName": "establishmentMeans", | |
| "columnInsertIndex": 20, | |
| "description": "Create column establishmentMeans at index 20 based on column cultivated using expression grel:if(isBlank(value), 'wild collection', 'cultivated')" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "collector", | |
| "expression": "grel:if(value.contains(/^[^a-z]*$/), value.toTitlecase(),value)", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10, | |
| "description": "Text transform on cells in column collector using expression grel:if(value.contains(/^[^a-z]*$/), value.toTitlecase(),value)" | |
| }, | |
| { | |
| "op": "core/text-transform", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "columnName": "associatedCollectors", | |
| "expression": "grel:if(value.contains(/^[^a-z]*$/),value.toTitlecase(),value)", | |
| "onError": "keep-original", | |
| "repeat": false, | |
| "repeatCount": 10, | |
| "description": "Text transform on cells in column associatedCollectors using expression grel:if(value.contains(/^[^a-z]*$/),value.toTitlecase(),value)" | |
| }, | |
| { | |
| "op": "core/column-addition", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "baseColumnName": "associatedSpecies", | |
| "expression": "grel:\"PreservedSpecimen\"", | |
| "onError": "set-to-blank", | |
| "newColumnName": "basisOfRecord", | |
| "columnInsertIndex": 24, | |
| "description": "Create column basisOfRecord at index 24 based on column associatedSpecies using expression grel:\"PreservedSpecimen\"" | |
| }, | |
| { | |
| "op": "core/column-rename", | |
| "oldColumnName": "scientificName", | |
| "newColumnName": "vvSciName", | |
| "description": "Rename column scientificName to vvSciName" | |
| }, | |
| { | |
| "op": "core/column-addition", | |
| "engineConfig": { | |
| "facets": [], | |
| "mode": "row-based" | |
| }, | |
| "baseColumnName": "vvSciName", | |
| "expression": "join ([cells['genus'].value,cells['specificEpithet'].value,cells['infraspecificEpithet'].value],' ')", | |
| "onError": "keep-original", | |
| "newColumnName": "scientificName", | |
| "columnInsertIndex": 2, | |
| "description": "Create column scientificName at index 2 based on column vvSciName using expression join ([cells['genus'].value,cells['specificEpithet'].value,cells['infraspecificEpithet'].value],' ')" | |
| } | |
| ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment