""" file: rdf2pandas.py auth: Peb Ruswono Aryan desc: import data in RDF Data Cube (assumed in particular shape) from Graph to Pandas DataFrame """ from rdflib import Graph, Namespace, RDF, RDFS import pandas as pd QB = Namespace('http://purl.org/linked-data/cube#') DCT = Namespace('http://purl.org/dc/terms/') def short_name(uristr: str) -> str: hpos = uristr.rindex('#') if '#' in uristr else -1 spos = uristr.rindex('/') if '/' in uristr else -1 return uristr[max(hpos,spos)+1:] def from_graph(g : Graph) -> pd.DataFrame : """ import DataFrame from rdflib.Graph expects shape : ?ds RDF.type QB.DataSet ?ds QB.structure ?dsd ?dsd QB.component [QB.dimension ?dim] || ?dsd QB.component [QB.measure ?mea] ?obs QB.dataSet ?ds ?obs ?dim ?dimval || ?obs ?mea ?meaval """ data = [] columns = [] dss = list(g.subjects(RDF.type, QB.DataSet)) if len(dss)>0: ds = dss[0] dsds = list(g.objects(ds, QB.structure)) if len(dsds)>0: dsd = dsds[0] dims = [] meas = [] for c in g.objects(dsd, QB.component): dim = g.value(c, QB.dimension) mea = g.value(c, QB.measure) if dim is not None: dims.append(dim) elif mea is not None: meas.append(mea) comps = dims + meas for c in comps: # try if there's some label annotation in the graph lbl = g.value(c, RDFS.label) if lbl is not None: columns.append(lbl) continue lbl = g.value(c, DCT.title) if lbl is not None: columns.append(lbl) continue lbl = short_name(str(c)) columns.append(lbl) for obs in g.subjects(QB.dataSet, ds): row = [] for c in comps: row.append(g.value(obs, c)) data.append(row) df = pd.DataFrame(data, columns = columns) return df