Skip to content

Instantly share code, notes, and snippets.

@mickley
Created November 6, 2024 00:54
Show Gist options
  • Select an option

  • Save mickley/4308bd972c2581ab3dcc25bda7ae5232 to your computer and use it in GitHub Desktop.

Select an option

Save mickley/4308bd972c2581ab3dcc25bda7ae5232 to your computer and use it in GitHub Desktop.
OpenRefine Cleaning steps used for Vouchervision data at the Oregon State University Herbarium
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "genus",
"expression": "value.toTitlecase()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column genus using expression value.toTitlecase()"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "specificEpithet",
"expression": "value.toLowercase()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column specificEpithet using expression value.toLowercase()"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "infraspecificEpithet",
"expression": "value.toLowercase()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column infraspecificEpithet using expression value.toLowercase()"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "country",
"expression": "value.toTitlecase()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column country using expression value.toTitlecase()"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "stateProvince",
"expression": "value.toTitlecase()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column stateProvince using expression value.toTitlecase()"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "county",
"expression": "value.toTitlecase()",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column county using expression value.toTitlecase()"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "county",
"expression": "grel:value.replace(/^(.+)( Co$?([\\.|u].*)?)/, \"$1\")",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column county using expression grel:value.replace(/^(.+)( Co$?([\\.|u].*)?)/, \"$1\")"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "country",
"expression": "grel:value.replace(/^(U\\.?S\\.?A\\.?)/, \"United States\")",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column country using expression grel:value.replace(/^(U\\.?S\\.?A\\.?)/, \"United States\")"
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "cultivated",
"expression": "grel:if(isBlank(value), 'wild collection', 'cultivated')",
"onError": "set-to-blank",
"newColumnName": "establishmentMeans",
"columnInsertIndex": 20,
"description": "Create column establishmentMeans at index 20 based on column cultivated using expression grel:if(isBlank(value), 'wild collection', 'cultivated')"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "collector",
"expression": "grel:if(value.contains(/^[^a-z]*$/), value.toTitlecase(),value)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column collector using expression grel:if(value.contains(/^[^a-z]*$/), value.toTitlecase(),value)"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "associatedCollectors",
"expression": "grel:if(value.contains(/^[^a-z]*$/),value.toTitlecase(),value)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column associatedCollectors using expression grel:if(value.contains(/^[^a-z]*$/),value.toTitlecase(),value)"
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "associatedSpecies",
"expression": "grel:\"PreservedSpecimen\"",
"onError": "set-to-blank",
"newColumnName": "basisOfRecord",
"columnInsertIndex": 24,
"description": "Create column basisOfRecord at index 24 based on column associatedSpecies using expression grel:\"PreservedSpecimen\""
},
{
"op": "core/column-rename",
"oldColumnName": "scientificName",
"newColumnName": "vvSciName",
"description": "Rename column scientificName to vvSciName"
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "vvSciName",
"expression": "join ([cells['genus'].value,cells['specificEpithet'].value,cells['infraspecificEpithet'].value],' ')",
"onError": "keep-original",
"newColumnName": "scientificName",
"columnInsertIndex": 2,
"description": "Create column scientificName at index 2 based on column vvSciName using expression join ([cells['genus'].value,cells['specificEpithet'].value,cells['infraspecificEpithet'].value],' ')"
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment