How can I add an edge ngram analyzer to Elastic search? Q&A

How can I improve the search results by using an edge ngram analyzer on a property in Elastic search?

You can add a custom analyzer by updating ProductIndexConfiguration.BuildIndexDescriptor

protected override CreateIndexDescriptor BuildIndexDescriptor(CultureInfo cultureInfo, CreateIndexDescriptor descriptor)
{
    return base.BuildIndexDescriptor(cultureInfo, descriptor)
        .Settings(s =>
            s.Setting(UpdatableIndexSettings.MaxNGramDiff, 3) // default is 1
            .Analysis(a => 
                a.Analyzers(az => 
                    az.Custom("custom_ngram_analyzer", 
                        c => c.Tokenizer("custom_ngram_tokenizer")
                            .Filters(new string[] { "lowercase" } )))
                .Tokenizers(t => 
                    t.EdgeNGram("custom_ngram_tokenizer", 
                        ng => ng
                            .MinGram(2) // will throw an error if the difference
                            .MaxGram(5) // is bigger than MaxNGramDiff above
                            .TokenChars(new TokenChar[] { TokenChar.Letter, TokenChar.Digit }
        )))));
}

You can then use this analyzer by decorating a field in ProductDocument with
[Text(Analyser = "custom_ngram_analyzer")]

And then use it in a text query
qc.Match(x => x.Field(z => z.MyProperty).Query(searchQuery.Text).Boost(10))

Fields used for aggregations (filters) or sorting needs to be of the type Keyword. If you want to apply the edge ngram analyzer to the property Name, you can set it up as a multi-field and then reference the child-field when querying.

protected override TypeMappingDescriptor<ProductDocument> BuildTypeMapDescriptor(CultureInfo cultureInfo, TypeMappingDescriptor<ProductDocument> descriptor)
{
    var field = descriptor
        .Properties(p => 
            p.Text(k => 
                k.Name(n => n.Name)
                    .Fields(ff => ff
                        .Keyword(tk => tk
                            .Name("keyword")
                            .IgnoreAbove(256))
                        .Text(tt => tt
                            .Name("ngram")
                            .Analyzer("custom_ngram_analyzer")
        ))));

    return base.BuildTypeMapDescriptor(cultureInfo, descriptor);
}

And then use .Suffix(...) in queries.

qc.Match(x => x.Field(z => z.Name.Suffix("ngram")).Query(searchQuery.Text).Boost(10))

2 Likes

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.

Due to changes in Litium 8.5.0 the analyzer is added slightly differently:

protected override void Configure(CultureInfo cultureInfo, IndexConfigurationBuilder<ProductDocument> builder)
{
	builder
	.Setting(UpdatableIndexSettings.MaxNGramDiff, 3) // default is 1
	.Analysis(a => a
		.Analyzers(az => az
			.Custom("custom_ngram_analyzer", c => c
				.Tokenizer("custom_ngram_tokenizer")
				.Filters(new string[] { "lowercase" })))
		.Tokenizers(t => t
			.EdgeNGram("custom_ngram_tokenizer", ng => ng
				.MinGram(2) // will throw an error if the difference
				.MaxGram(5) // is bigger than MaxNGramDiff above
				.TokenChars(new TokenChar[] { TokenChar.Letter, TokenChar.Digit })
			)
		)
	)
	.Map(m => m
		.Properties(p => p
			.Text(k => k
				.Name(n => n.Name)
				.Fields(ff => ff
					.Keyword(tk => tk
						.Name("keyword")
						.IgnoreAbove(256))
					.Text(tt => tt
						.Name("ngram")
						.Analyzer("custom_ngram_analyzer")
					)
				)
			)
		)
	);
}
1 Like