Skip to content

Instantly share code, notes, and snippets.

@purem
Created January 15, 2013 08:02
Show Gist options
  • Select an option

  • Save purem/4537084 to your computer and use it in GitHub Desktop.

Select an option

Save purem/4537084 to your computer and use it in GitHub Desktop.

Revisions

  1. purem created this gist Jan 15, 2013.
    154 changes: 154 additions & 0 deletions ngram_test.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,154 @@
    # ========================================
    # Testing n-gram analysis in ElasticSearch
    # ========================================

    curl -X DELETE localhost:9200/courses
    curl -X POST http://localhost:9200/courses -d '{
    "mappings":{
    "course":{
    "properties":{
    "name":{
    "type":"string",
    "index_analyzer":"autocomplete_analyzer",
    "search_analyzer":"keyword"
    }
    }
    }
    },
    "settings":{
    "number_of_shards":1,
    "number_of_replicas":1,
    "analysis":{
    "filter":{
    "autocomplete_edgeNGram":{
    "type":"edgeNGram",
    "min_gram":2,
    "max_gram":10,
    "side":"front"
    },
    "word_filter":{
    "type":"word_delimiter",
    "generate_word_parts":false,
    "generate_number_parts":false,
    "split_on_numerics":false,
    "split_on_case_change":false,
    "preserve_original":true
    }
    },
    "analyzer":{
    "autocomplete_analyzer":{
    "tokenizer":"standard",
    "filter":[
    "asciifolding",
    "lowercase",
    "word_filter",
    "autocomplete_edgeNGram"
    ],
    "type":"custom"
    }
    }
    }
    }}'

    # Test basic ngram functionality, gets correct course
    curl -X GET 'http://localhost:9200/courses/course/_search?load=true&size=10&pretty' -d '{
    "query":{
    "query_string":{
    "query":"merc",
    "fields":[
    "name"
    ]
    }
    },
    "size":4}'

    # Test with full keyword, gets correct course
    curl -X GET 'http://localhost:9200/courses/course/_search?load=true&size=10&pretty' -d '{
    "query":{
    "query_string":{
    "query":"merc",
    "fields":[
    "name"
    ]
    }
    },
    "size":4}'

    # Test with full subject without hypen, gets correct course
    curl -X GET 'http://localhost:9200/courses/course/_search?load=true&size=10&pretty' -d '{
    "query":{
    "query_string":{
    "query":"mercedes benz",
    "fields":[
    "name"
    ]
    }
    },
    "size":4}'


    # Test with full subject without hyphenated, FAILS to gets correct course (No results)
    curl -X GET 'http://localhost:9200/courses/course/_search?load=true&size=10&pretty' -d '{
    "query":{
    "query_string":{
    "query":"mercedes-benz",
    "fields":[
    "name"
    ]
    }
    },
    "size":4}'

    # {
    # "took" : 1,
    # "timed_out" : false,
    # "_shards" : {
    # "total" : 1,
    # "successful" : 1,
    # "failed" : 0
    # },
    # "hits" : {
    # "total" : 0,
    # "max_score" : null,
    # "hits" : [ ]
    # }
    # }%

    # Test with full subject without hyphenated, Gets extra unwanted result
    curl -X GET 'http://localhost:9200/courses/course/_search?load=true&size=10&pretty' -d '{
    "query":{
    "query_string":{
    "query":"mercedes-benz",
    "fields":[
    "name"
    ]
    }
    },
    "size":4}'

    # {
    # "took" : 1,
    # "timed_out" : false,
    # "_shards" : {
    # "total" : 1,
    # "successful" : 1,
    # "failed" : 0
    # },
    # "hits" : {
    # "total" : 2,
    # "max_score" : 0.3592204,
    # "hits" : [ {
    # "_index" : "courses",
    # "_type" : "course",
    # "_id" : "7",
    # "_score" : 0.3592204, "_source" : {"name":"Mercedes-Benz Driving School"}
    # }, {
    # "_index" : "courses",
    # "_type" : "course",
    # "_id" : "13",
    # "_score" : 0.3592204, "_source" : {"name":"Being Cool With Bond, James Bond"}
    # } ]
    # }
    # }%

    curl -X POST "http://localhost:9200/courses/_refresh"