Created
          October 9, 2019 14:59 
        
      - 
      
 - 
        
Save pebbie/a3c9795a20510f2765c3d707eaff4e5d to your computer and use it in GitHub Desktop.  
Revisions
- 
        
pebbie created this gist
Oct 9, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,66 @@ """ file: rdf2pandas.py auth: Peb Ruswono Aryan desc: import data in RDF Data Cube (assumed in particular shape) from Graph to Pandas DataFrame """ from rdflib import Graph, Namespace, RDF, RDFS import pandas as pd QB = Namespace('http://purl.org/linked-data/cube#') DCT = Namespace('http://purl.org/dc/terms/') def short_name(uristr: str) -> str: hpos = uristr.rindex('#') if '#' in uristr else -1 spos = uristr.rindex('/') if '/' in uristr else -1 return uristr[max(hpos,spos)+1:] def from_graph(g : Graph) -> pd.DataFrame : """ import DataFrame from rdflib.Graph expects shape : ?ds RDF.type QB.DataSet ?ds QB.structure ?dsd ?dsd QB.component [QB.dimension ?dim] || ?dsd QB.component [QB.measure ?mea] ?obs QB.dataSet ?ds ?obs ?dim ?dimval || ?obs ?mea ?meaval """ data = [] columns = [] dss = list(g.subjects(RDF.type, QB.DataSet)) if len(dss)>0: ds = dss[0] dsds = list(g.objects(ds, QB.structure)) if len(dsds)>0: dsd = dsds[0] dims = [] meas = [] for c in g.objects(dsd, QB.component): dim = g.value(c, QB.dimension) mea = g.value(c, QB.measure) if dim is not None: dims.append(dim) elif mea is not None: meas.append(mea) comps = dims + meas for c in comps: # try if there's some label annotation in the graph lbl = g.value(c, RDFS.label) if lbl is not None: columns.append(lbl) continue lbl = g.value(c, DCT.title) if lbl is not None: columns.append(lbl) continue lbl = short_name(str(c)) columns.append(lbl) for obs in g.subjects(QB.dataSet, ds): row = [] for c in comps: row.append(g.value(obs, c)) data.append(row) df = pd.DataFrame(data, columns = columns) return df