def addFeature(v: SparseIndexedTensor1, f: String) { v.update(ClassifierPosFeatureDomain.index(f), 1.0) } def addLemma(v: SparseIndexedTensor1, w: WordData, f: String, prefix: String) { if (w.ambiguityClasses.contains(f)) addFeature(v, prefix+f) } def getAffinity(sent: SentenceData, w: WordData, pos: Int) { val f = sent.get(sent.lemmas, pos) if (w.ambiguityClasses.contains(f)) w.ambiguityClasses(f) else "" } def getLemmaFeature(sent: SentenceData, w: WordData, pos: Int, dif: Int) = { val prefix = "W"+(dif)+"=" val lemma = sent.get(sent.lemmas, pos+dif) if (w.ambiguityClasses.contains(lemma)) prefix+lemma else prefix } def addFeatures(sent: SentenceData, pos: Int, f: SparseIndexedTensor1, w: WordData) { val wp3 = getLemmaFeature(sent, w, pos, +3) val wp2 = getLemmaFeature(sent, w, pos, +2) val wp1 = getLemmaFeature(sent, w, pos, +1) val wf = getLemmaFeature(sent, w, pos, 0) val wm1 = getLemmaFeature(sent, w, pos, -1) val wm2 = getLemmaFeature(sent, w, pos, -2) val wm3 = getLemmaFeature(sent, w, pos, -3) val pm3 = "POS-3="+sent.get(sent.labels, pos-3) val pm2 = "POS-2="+sent.get(sent.labels, pos-2) val pm1 = "POS-1="+sent.get(sent.labels, pos-1) val a0 = "A="+getAffinity(sent, w, pos) val ap1 = "A+1="+getAffinity(sent, w, pos+1) val ap2 = "A+2="+getAffinity(sent, w, pos+2) val ap3 = "A+3="+getAffinity(sent, w, pos+3) addFeature(f, wp3) addFeature(f, wp2) addFeature(f, wp1) addFeature(f, wf) addFeature(f, wm1) addFeature(f, wm2) addFeature(f, wm3) addFeature(f, pm3) addFeature(f, pm2) addFeature(f, pm1) addFeature(f, a0) addFeature(f, ap1) addFeature(f, ap2) addFeature(f, ap2) addFeature(f, wm2+wm1) addFeature(f, wm1+wf) addFeature(f, wf+wp1) addFeature(f, wp1+wp2) addFeature(f, wm1+wp1) addFeature(f, pm2+pm1) addFeature(f, ap1+ap2) addFeature(f, pm1+ap1) addFeature(f, pm1+a0) addFeature(f, a0+ap1) addFeature(f, wm2+wm1+wf) addFeature(f, wm1+wf+wp1) addFeature(f, wf+wp1+wp2) addFeature(f, wm2+wm1+wp1) addFeature(f, wm1+wp1+wp2) addFeature(f, pm2+pm1+a0) addFeature(f, pm1+a0+ap1) addFeature(f, pm2+pm1+ap1) addFeature(f, pm1+ap1+ap2) addFeature(f, a0+ap1+ap2) addFeature(f, "PREFX3="+wf.take(3)) addFeature(f, "SUFX4="+wf.takeRight(4)) addFeature(f, "Shape="+strings.stringShape(wf, 2)) // TODO(apassos): add the remaining jinho features not contained in shape addFeature(f, "HasPeriod="+wf.contains(".")) addFeature(f, "HasDigit="+wf.contains("0")) addFeature(f, "HasHyphen="+wf.contains("-")) }