Skip to content

Instantly share code, notes, and snippets.

@Jawn78
Created February 13, 2018 17:57
Show Gist options
  • Select an option

  • Save Jawn78/bab736aeeb9c5b0f64d99e7aceeb1d98 to your computer and use it in GitHub Desktop.

Select an option

Save Jawn78/bab736aeeb9c5b0f64d99e7aceeb1d98 to your computer and use it in GitHub Desktop.

Revisions

  1. Jawn78 created this gist Feb 13, 2018.
    89 changes: 89 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,89 @@
    /*
    * To change this license header, choose License Headers in Project Properties.
    * To change this template file, choose Tools | Templates
    * and open the template in the editor.
    */
    package rex1nlp;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.InputStream;
    import opennlp.tools.namefind.NameFinderME;
    import opennlp.tools.namefind.TokenNameFinderModel;
    import opennlp.tools.tokenize.TokenizerME;
    import opennlp.tools.tokenize.TokenizerModel;
    import opennlp.tools.util.Span;
    import org.apache.tika.metadata.Metadata;
    import org.apache.tika.parser.AutoDetectParser;
    import org.apache.tika.parser.ParseContext;
    import org.apache.tika.parser.Parser;
    import org.apache.tika.sax.BodyContentHandler;
    import org.xml.sax.ContentHandler;

    public class rexNERex {
    public static void main(String args[]) throws Exception{


    //Getting the sentence in the form of String array
    String target = "C:\\Users\\RexPC\\Documents\\Haily.docx";

    File document = new File(target);
    Parser parser = new AutoDetectParser();

    ContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();


    parser.parse(new FileInputStream(document), handler, metadata, new ParseContext());

    // System.out.println(handler);

    //Loading the tokenizer model
    InputStream inputStreamTokenizer = new
    FileInputStream("C:\\Users\\RexPC\\Documents\\Programming\\Apache OpenNLP\\Models\\Original OpenNLP Models\\en-token.bin");
    TokenizerModel tokenModel = new TokenizerModel(inputStreamTokenizer);

    //Instantiating the TokenizerME class
    TokenizerME tokenizer = new TokenizerME(tokenModel);

    //Tokenizing the sentence in to a string array
    String tokens[] = tokenizer.tokenize(handler.toString());
    for(String tokenin: tokens)
    System.out.println(tokenin);

    InputStream inputStreamNameFinder = new FileInputStream("C:\\Users\\RexPC\\Documents\\Programming\\Apache OpenNLP\\Models\\Original OpenNLP Models\\en-ner-person.bin");
    TokenNameFinderModel model = new TokenNameFinderModel(inputStreamNameFinder);

    //Instantiating the NameFinderME class
    NameFinderME nameFinder = new NameFinderME(model);

    //Finding the names in the sentence
    Span nameSpans[] = nameFinder.find(tokens);

    //Printing the names and their spans in a sentence
    // for(Span s: nameSpans)
    // System.out.println(s.toString());

    /*
    InputStream modelIn = new FileInputStream("C:\\Users\\RexPC\\Documents\\Programming\\Apache OpenNLP\\Models\\Original OpenNLP Models\\en-sent.bin");
    SentenceModel stcmodel = null;
    try {
    stcmodel = new SentenceModel(modelIn);
    }
    catch (IOException e) {
    }

    //Instantiating the SentenceDetectorME class
    SentenceDetectorME detector = new SentenceDetectorME(stcmodel);

    String sentences[];
    sentences = detector.sentDetect(handler.toString());

    //Finding the names in the sentence
    Span nameSpans[] = nameFinder.find(sentences);

    //Printing the spans of the names in the sentence
    for(Span s: nameSpans)
    System.out.println(s.toString());
    */
    }
    }