Last active
August 2, 2022 19:07
-
-
Save peterdm/e1ebf6e17505e73ab10834d2458d60c9 to your computer and use it in GitHub Desktop.
Revisions
-
peterdm revised this gist
Aug 2, 2022 . No changes.There are no files selected for viewing
-
peterdm created this gist
Aug 2, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,183 @@ # Original Index Structure (just took the pages field for brevity) PUT test_index { "settings": { "analysis": { "char_filter": { "punct_annotation": { "type": "mapping", "mappings": [ ". => \\n_PERIOD_\\n", "\\n => \\n_NEWLINE_\\n", ", => \\n_COMMA_\\n", "; => \\n_SEMI_\\n" ] }, "punctuation": {"type": "mapping", "mappings": [".=>"]} }, "normalizer": { "keyword_normalizer": {"type": "custom", "char_filter": [], "filter": ["lowercase", "asciifolding"]} }, "filter": { "english_stop": {"type": "stop", "stopwords": "_english_"}, "english_stemmer": {"type": "stemmer", "language": "english"}, "english_possessive_stemmer": {"type": "stemmer", "language": "possessive_english"} }, "analyzer": { "cleanedStem": { "tokenizer": "standard", "filter": ["english_possessive_stemmer", "lowercase", "english_stop", "english_stemmer"], "char_filter": ["punct_annotation"], "type": "custom" }, "exactMatch": { "tokenizer": "standard", "filter": ["lowercase"], "char_filter": ["punct_annotation"], "type": "custom" }, "exactInput": {"tokenizer": "whitespace", "filter": ["lowercase"], "type": "custom"}, "suggester_lowercase": { "tokenizer": "standard", "char_filter": ["punctuation"], "filter": ["lowercase", "asciifolding"] } } } }, "mappings": { "_source": {"enabled": true}, "dynamic": "false", "properties": { "pages": { "type": "nested", "properties": { "timestamp": {"type": "date", "store": true, "format": "strict_date_hour_minute_second_fraction"}, "url": { "type": "keyword", "store": true }, "type": { "type": "keyword", "store": true }, "text": { "type": "text", "store": true, "index_options": "offsets", "fielddata": true, "fields": { "stemmed": {"type": "text", "analyzer": "cleanedStem", "index_options": "offsets"}, "exact": {"type": "text", "analyzer": "exactMatch", "index_options": "offsets"} } } } } } } } GET test_index/_mapping/field/pages.text PUT test_index/_mapping { "properties": { "all_pages": { "type": "text", "similarity": "spread_similarity", "fields": { "exact": { "type": "text", "similarity": "spread_similarity", "index_options": "offsets", "analyzer": "exactMatch" }, "stemmed": { "type": "text", "similarity": "spread_similarity", "index_options": "offsets", "analyzer": "cleanedStem" } } }, "pages": { "type": "nested", "properties": { "text": { "type": "text", "store": true, "index_options": "offsets", "fielddata": true, "fields": { "stemmed": {"type": "text", "analyzer": "cleanedStem", "index_options": "offsets"}, "exact": {"type": "text", "analyzer": "exactMatch", "index_options": "offsets"} }, "copy_to": "all_pages" } } } } } GET test_index/_settings PUT test_index/_settings { "settings": { "index": { "similarity": { "spread_similarity": { "type": "BM25", "b": 1, "k1": 1.75 } } } } } # Close index before changes POST test_index/_close # Can writes happen? POST test_index/_doc/1 { "pages": [ { "text": ["Hello World!", "Is this index open?"] } ] } # Can reads happen GET test_index/_search { "query": { "nested": { "path": "pages", "query": { "match_phrase": { "pages.text.stemmed": "hello world" } } } } } GET test_index/_search { "query": { "match_phrase": { "all_pages.stemmed": "hello world" } } } # Reopen index after changes POST test_index/_open DELETE test_index/_doc/1