Couchbase Server (A.K.A. Couchbase) is open source, NoSQL, document oriented database that stores JSON documents. Couchbase has recently introduced a new feature - Full Text Search (FTS). This feature is currently a Developer Preview Version (as of Aug 24, 2017). Couchbase has provided decent documentation for FTS which can be access here.
Couchbase stores all the JSON documents in a Bucket. Using N1QL, you can query these documents. These queries are more like SQL statements but for JSON. On the other hand, with FTS you can use following powerful query types:
In order to use FTS feature, we need to make our bucket searchable by defining search index. This can be done using Couchbase Console or by executing CURL command. For example, we will be creating multiple indexes based on a JSON field: type. You can use any field from your JSON document. If this field is present in each JSON document, then that is even better.
To create search index for “user”, we will perform following steps:
Since Couchbase FTS feature is currently (as of Aug 24, 2017) "experimental", the functionality can change in future.
P.S. Click here to access my other posts.
Couchbase stores all the JSON documents in a Bucket. Using N1QL, you can query these documents. These queries are more like SQL statements but for JSON. On the other hand, with FTS you can use following powerful query types:
- Term, Phrase, Match, Match Phrase, Prefix
- Conjunction, Disjunction, Boolean
- Numeric and Date Ranges
- Query String
In order to use FTS feature, we need to make our bucket searchable by defining search index. This can be done using Couchbase Console or by executing CURL command. For example, we will be creating multiple indexes based on a JSON field: type. You can use any field from your JSON document. If this field is present in each JSON document, then that is even better.
To create search index for “user”, we will perform following steps:
- Login to Couchbase Web Console with admin credentials
- Select Indexes >> Full Text >> New Full Text Index
- Provide name of the index: fts-user-index
- Select the bucket
- Select JSON type field as Type Identifier and provide field name: type
- Select Type Mappings >> Add Type Mapping
- Enter name as: user
- Dropdown value should be “inherit”, enabled check box is checked and only index specified field checkbox is unchecked
- Click OK to create type mapping for user
- Go to “default” type mapping and click on edit
- Uncheck enabled check box and click OK
- Click on “Create Index” button
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.sample.fts; | |
import java.util.ArrayList; | |
import java.util.List; | |
import org.apache.commons.lang3.StringUtils; | |
import org.apache.commons.lang3.Validate; | |
import com.couchbase.client.java.Bucket; | |
import com.couchbase.client.java.search.SearchQuery; | |
import com.couchbase.client.java.search.queries.AbstractFtsQuery; | |
import com.couchbase.client.java.search.queries.BooleanQuery; | |
import com.couchbase.client.java.search.queries.ConjunctionQuery; | |
import com.couchbase.client.java.search.queries.DisjunctionQuery; | |
import com.couchbase.client.java.search.result.SearchQueryResult; | |
/** | |
* This class provides various user friendly static methods to query Couchbase | |
* with FTS (Full-Text-Search) feature. | |
* | |
* @author Sagar Chaudhari | |
*/ | |
public class CouchbaseFullTextService { | |
private static final String FIELD_SEPARATOR = ":"; | |
private static final String TERM_TRUE = "T"; | |
private static final String TERM_FALSE = "F"; | |
private CouchbaseFullTextService() { | |
// Private Constructor | |
} | |
/*- | |
* <p> Find records with one or more matching texts.</p> | |
* | |
* <p>Examples:</p> | |
* <pre> | |
* CouchbaseFullTextService.findByMatchingTexts(bucket, INDEX_NAME, 0, false, "attributes.attr1:attr", "attributes.attr:sample"); | |
* CouchbaseFullTextService.findByMatchingTexts(bucket, INDEX_NAME, 0, true, "attributes.attr1:attr", "attributes.attr:sample"); | |
* CouchbaseFullTextService.findByMatchingTexts(bucket, INDEX_NAME, 0, false, "attributes.attr1:attr", "sample"); | |
* CouchbaseFullTextService.findByMatchingTexts(bucket, INDEX_NAME, 0, true, "attributes.attr1:attr", "sample"); | |
* CouchbaseFullTextService.findByMatchingTexts(bucket, INDEX_NAME, 0, false, "sample"); | |
* CouchbaseFullTextService.findByMatchingTexts(bucket, INDEX_NAME, 0, true, "sample"); | |
* CouchbaseFullTextService.findByMatchingTexts(bucket, INDEX_NAME, 2, false, "sample"); | |
* CouchbaseFullTextService.findByMatchingTexts(bucket, INDEX_NAME, 2, true, "sample"); | |
* </pre> | |
* | |
* @param bucket the bucket name | |
* @param searchIndex the FTS (Full-Text-Search) index name | |
* @param fuzziness the fuzziness (default 0) | |
* @param matchAll if true, then return results which has all the matching searchTexts; if false, then return results which has at any matching searchTexts; default false | |
* @param searchTexts one or more search texts | |
* @return SearchQueryResult which contains document ids and hit locations | |
*/ | |
public static SearchQueryResult findByMatchingTexts(Bucket bucket, String searchIndex, int fuzziness, boolean matchAll, String... searchTexts) { | |
Validate.notEmpty(searchTexts, "Value of %s cannot be null or empty", "searchTexts"); | |
List<AbstractFtsQuery> queries = new ArrayList<AbstractFtsQuery>(); | |
for (String searchText : searchTexts) { | |
String[] searchTextArr = StringUtils.split(searchText, FIELD_SEPARATOR, 2); | |
if (searchTextArr.length == 2) { | |
queries.add(SearchQuery.match(searchTextArr[1]).field(searchTextArr[0]).fuzziness(fuzziness)); | |
} else { | |
queries.add(SearchQuery.match(searchText).fuzziness(fuzziness)); | |
} | |
} | |
AbstractFtsQuery[] abstractFtsQueries = new AbstractFtsQuery[queries.size()]; | |
abstractFtsQueries = queries.toArray(abstractFtsQueries); | |
if (matchAll) { | |
return bucket.query(new SearchQuery(searchIndex, new ConjunctionQuery(abstractFtsQueries))); | |
} else { | |
return bucket.query(new SearchQuery(searchIndex, new DisjunctionQuery(abstractFtsQueries))); | |
} | |
} | |
/*- | |
* <p> Find records with one or more matching phrases.</p> | |
* | |
* <p>Examples:</p> | |
* <pre> | |
* CouchbaseFullTextService.findByMatchingPhrases(bucket, INDEX_NAME, false, "attributes.attr1:sample attr", "attributes.attr:another value"); | |
* CouchbaseFullTextService.findByMatchingPhrases(bucket, INDEX_NAME, true, "attributes.attr1:sample attr", "attributes.attr:another value"); | |
* CouchbaseFullTextService.findByMatchingPhrases(bucket, INDEX_NAME, false, "attributes.attr1:sample attr", "another value"); | |
* CouchbaseFullTextService.findByMatchingPhrases(bucket, INDEX_NAME, true, "attributes.attr1:sample attr", "another value"); | |
* CouchbaseFullTextService.findByMatchingPhrases(bucket, INDEX_NAME, false, "sample attr"); | |
* CouchbaseFullTextService.findByMatchingPhrases(bucket, INDEX_NAME, true, "sample attr"); | |
* </pre> | |
* | |
* @param bucket the bucket name | |
* @param searchIndex the FTS (Full-Text-Search) index name | |
* @param matchAll if true, then return results which has all the matching searchTexts; if false, then return results which has at any matching searchTexts; default false | |
* @param searchPhrases one or more search phrases | |
* @return SearchQueryResult which contains document ids and hit locations | |
*/ | |
public static SearchQueryResult findByMatchingPhrases(Bucket bucket, String searchIndex, boolean matchAll, String... searchPhrases) { | |
Validate.notEmpty(searchPhrases, "Value of %s cannot be null or empty", "searchPhrases"); | |
List<AbstractFtsQuery> queries = new ArrayList<AbstractFtsQuery>(); | |
for (String searchPhrase : searchPhrases) { | |
String[] searchPhraseArr = StringUtils.split(searchPhrase, FIELD_SEPARATOR, 2); | |
if (searchPhraseArr.length == 2) { | |
queries.add(SearchQuery.matchPhrase(searchPhraseArr[1]).field(searchPhraseArr[0])); | |
} else { | |
queries.add(SearchQuery.matchPhrase(searchPhrase)); | |
} | |
} | |
AbstractFtsQuery[] abstractFtsQueries = new AbstractFtsQuery[queries.size()]; | |
abstractFtsQueries = queries.toArray(abstractFtsQueries); | |
if (matchAll) { | |
return bucket.query(new SearchQuery(searchIndex, new ConjunctionQuery(abstractFtsQueries))); | |
} else { | |
return bucket.query(new SearchQuery(searchIndex, new DisjunctionQuery(abstractFtsQueries))); | |
} | |
} | |
/*- | |
* <p> Find records by regular expression.</p> | |
* | |
* <p>Examples:</p> | |
* <pre> | |
* CouchbaseFullTextService.findByRegularExpression(bucket, INDEX_NAME, "[a-z]*\\s*attr"); | |
* </pre> | |
* | |
* @param bucket the bucket name | |
* @param searchIndex the FTS (Full-Text-Search) index name | |
* @param searchExpression the regular expression | |
* @return SearchQueryResult which contains document ids and hit locations | |
*/ | |
public static SearchQueryResult findByRegularExpression(Bucket bucket, String searchIndex, String searchExpression) { | |
Validate.notEmpty(searchExpression, "Value of %s cannot be null or empty", "searchExpression"); | |
return bucket.query(new SearchQuery(searchIndex, SearchQuery.regexp(searchExpression))); | |
} | |
/*- | |
* <p> Find records by prefix i.e. any word starting with.</p> | |
* | |
* <p>Examples:</p> | |
* <pre> | |
* CouchbaseFullTextService.findByPrefix(bucket, INDEX_NAME, "sample"); | |
* </pre> | |
* | |
* @param bucket the bucket name | |
* @param searchIndex the FTS (Full-Text-Search) index name | |
* @param searchPrefix the search prefix | |
* @return SearchQueryResult which contains document ids and hit locations | |
*/ | |
public static SearchQueryResult findByPrefix(Bucket bucket, String searchIndex, String searchPrefix) { | |
Validate.notEmpty(searchPrefix, "Value of %s cannot be null or empty", "searchPrefix"); | |
return bucket.query(new SearchQuery(searchIndex, SearchQuery.prefix(searchPrefix))); | |
} | |
/*- | |
* <p> Find records by text with wild cards (* or ?)</p> | |
* | |
* <p>Examples:</p> | |
* <pre> | |
* CouchbaseFullTextService.findByWildcard(bucket, INDEX_NAME, "sample*"); | |
* </pre> | |
* | |
* @param bucket the bucket name | |
* @param searchIndex the FTS (Full-Text-Search) index name | |
* @param searchWildcard the search text with wild card (* or ?) | |
* @return SearchQueryResult which contains document ids and hit locations | |
*/ | |
public static SearchQueryResult findByWildcard(Bucket bucket, String searchIndex, String searchWildcard) { | |
Validate.notEmpty(searchWildcard, "Value of %s cannot be null or empty", "searchWildcard"); | |
return bucket.query(new SearchQuery(searchIndex, SearchQuery.wildcard(searchWildcard))); | |
} | |
/*- | |
* <p> Find records by boolean value</p> | |
* | |
* <p>Examples:</p> | |
* <pre> | |
* CouchbaseFullTextService.findByBoolean(bucket, INDEX_NAME, "isActive", false); | |
* CouchbaseFullTextService.findByBoolean(bucket, INDEX_NAME, "isActive", true); | |
* CouchbaseFullTextService.findByBoolean(bucket, INDEX_NAME, null, true); | |
* CouchbaseFullTextService.findByBoolean(bucket, INDEX_NAME, "", true); | |
* </pre> | |
* | |
* @param bucket the bucket name | |
* @param searchIndex the FTS (Full-Text-Search) index name | |
* @param field the field name (ignore if null or empty) | |
* @param searchValue search value true/false | |
* @return SearchQueryResult which contains document ids and hit locations (locations will be empty in this case) | |
*/ | |
public static SearchQueryResult findByBoolean(Bucket bucket, String searchIndex, String field, boolean searchValue) { | |
/*- | |
* NOTE: Looks like there is a bug with .booleanField implementation. | |
* It throws exception: com.couchbase.client.java.error.FtsMalformedRequestException: FTS request is malformed. | |
* So, .booleanField syntax cannot be used. Commenting. | |
* | |
* Work around is to use .term instead. Internally boolean values are stored as T/F. | |
* See below. | |
*/ | |
//return bucket.query(new SearchQuery(searchIndex, SearchQuery.booleanField(searchValue).field(field))); | |
return bucket.query(new SearchQuery(searchIndex, SearchQuery.term(searchValue ? TERM_TRUE : TERM_FALSE).field(field))); | |
} | |
/*- | |
* <p>Find records by various combinations</p> | |
* | |
* <p>Examples:</p> | |
* <pre> | |
* CouchbaseFullTextService.findByMatchingNonMatchingTexts(bucket, INDEX_NAME, new String[] {"attributes.supporterId:RegressionSupporter1"}, new String[] {"attributes.attr:attr"}, new String[] {"sample"}); | |
* </pre> | |
* | |
* @param bucket the bucket name | |
* @param searchIndex the FTS (Full-Text-Search) index name | |
* @param searchMustTexts search text(s) which must match | |
* @param searchMustNotTexts search text(s) which must not match | |
* @param searchShouldTexts search text(s) which should be available (may or may not be available in the response) | |
* @return SearchQueryResult which contains document ids and hit locations | |
*/ | |
public static SearchQueryResult findByMatchingNonMatchingTexts(Bucket bucket, String searchIndex, String[] searchMustTexts, String[] searchMustNotTexts, String[] searchShouldTexts) { | |
BooleanQuery query = SearchQuery.booleans(); | |
List<AbstractFtsQuery> mustQueries = new ArrayList<AbstractFtsQuery>(); | |
List<AbstractFtsQuery> mustNotQueries = new ArrayList<AbstractFtsQuery>(); | |
List<AbstractFtsQuery> shouldQueries = new ArrayList<AbstractFtsQuery>(); | |
if (searchMustTexts != null && searchMustTexts.length > 0) { | |
for (String searchMustText : searchMustTexts) { | |
String[] searchMustTextArr = StringUtils.split(searchMustText, FIELD_SEPARATOR, 2); | |
if (searchMustTextArr.length == 2) { | |
mustQueries.add(SearchQuery.match(searchMustTextArr[1]).field(searchMustTextArr[0])); | |
} else { | |
mustQueries.add(SearchQuery.match(searchMustText)); | |
} | |
} | |
AbstractFtsQuery[] abstractFtsQueries = new AbstractFtsQuery[mustQueries.size()]; | |
abstractFtsQueries = mustQueries.toArray(abstractFtsQueries); | |
query = query.must(abstractFtsQueries); | |
} | |
if (searchMustNotTexts != null && searchMustNotTexts.length > 0) { | |
for (String searchMustNotText : searchMustNotTexts) { | |
String[] searchMustNotTextArr = StringUtils.split(searchMustNotText, FIELD_SEPARATOR, 2); | |
if (searchMustNotTextArr.length == 2) { | |
mustNotQueries.add(SearchQuery.match(searchMustNotTextArr[1]).field(searchMustNotTextArr[0])); | |
} else { | |
mustNotQueries.add(SearchQuery.match(searchMustNotText)); | |
} | |
} | |
AbstractFtsQuery[] abstractFtsQueries = new AbstractFtsQuery[mustNotQueries.size()]; | |
abstractFtsQueries = mustNotQueries.toArray(abstractFtsQueries); | |
query = query.mustNot(abstractFtsQueries); | |
} | |
if (searchShouldTexts != null && searchShouldTexts.length > 0) { | |
for (String searchShouldText : searchShouldTexts) { | |
String[] searchShouldTextArr = StringUtils.split(searchShouldText, FIELD_SEPARATOR, 2); | |
if (searchShouldTextArr.length == 2) { | |
shouldQueries.add(SearchQuery.match(searchShouldTextArr[1]).field(searchShouldTextArr[0])); | |
} else { | |
shouldQueries.add(SearchQuery.match(searchShouldText)); | |
} | |
} | |
AbstractFtsQuery[] abstractFtsQueries = new AbstractFtsQuery[shouldQueries.size()]; | |
abstractFtsQueries = shouldQueries.toArray(abstractFtsQueries); | |
query = query.should(abstractFtsQueries); | |
} | |
return bucket.query(new SearchQuery(searchIndex, query)); | |
} | |
/*- | |
* <p>Find records by specified numeric values</p> | |
* | |
* <p>Examples:</p> | |
* <pre> | |
* SearchQueryResult result = CouchbaseFullTextService.findByNumberRange(bucket, INDEX_NAME, null, 5, false, 10, false); | |
* SearchQueryResult result = CouchbaseFullTextService.findByNumberRange(bucket, INDEX_NAME, null, 5, false, 10, true); | |
* SearchQueryResult result = CouchbaseFullTextService.findByNumberRange(bucket, INDEX_NAME, "", 5, false, 10, false); | |
* SearchQueryResult result = CouchbaseFullTextService.findByNumberRange(bucket, INDEX_NAME, "", 5, false, 10, true); | |
* SearchQueryResult result = CouchbaseFullTextService.findByNumberRange(bucket, INDEX_NAME, "attributes.attr", 5, false, 10, false); | |
* SearchQueryResult result = CouchbaseFullTextService.findByNumberRange(bucket, INDEX_NAME, "attributes.attr", 5, true, 10, true); | |
* </pre> | |
* | |
* @param bucket the bucket name | |
* @param searchIndex the FTS (Full-Text-Search) index name | |
* @param field the field name (ignore if null or empty) | |
* @param min minimum numeric value to search | |
* @param minInclusive whether to include min value in search results | |
* @param max maximum numeric value to search | |
* @param maxInclusive whether to include max value in search results | |
* @return SearchQueryResult which contains document ids and hit locations (locations will be empty in this case) | |
*/ | |
public static SearchQueryResult findByNumberRange(Bucket bucket, String searchIndex, String field, double min, boolean minInclusive, double max, boolean maxInclusive) { | |
return bucket.query(new SearchQuery(searchIndex, SearchQuery.numericRange().min(min, minInclusive).max(max, maxInclusive).field(field))); | |
} | |
/*- | |
* <p>Find records by specified date values</p> | |
* | |
* <p>Examples:</p> | |
* <pre> | |
* SearchQueryResult result = CouchbaseFullTextService.findByDateRange(bucket, INDEX_NAME, "", "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", "2017-08-04T06:17:31.460Z", false, "", false); | |
* SearchQueryResult result = CouchbaseFullTextService.findByDateRange(bucket, INDEX_NAME, "", "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", "2017-08-04T06:17:31.460Z", true, "", false); | |
* SearchQueryResult result = CouchbaseFullTextService.findByDateRange(bucket, INDEX_NAME, "", "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", "", false, "2017-08-04T06:17:31.460Z", false); | |
* SearchQueryResult result = CouchbaseFullTextService.findByDateRange(bucket, INDEX_NAME, "", "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", "", false, "2017-08-04T06:17:31.460Z", true); | |
* SearchQueryResult result = CouchbaseFullTextService.findByDateRange(bucket, INDEX_NAME, "", "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", "2017-08-03T06:17:31.460Z", false, "2017-08-04T06:17:31.460Z", false); | |
* SearchQueryResult result = CouchbaseFullTextService.findByDateRange(bucket, INDEX_NAME, "", "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", "2017-08-03T06:17:31.460Z", true, "2017-08-04T06:17:31.460Z", false); | |
* SearchQueryResult result = CouchbaseFullTextService.findByDateRange(bucket, INDEX_NAME, "", "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", "2017-08-03T06:17:31.460Z", false, "2017-08-04T06:17:31.460Z", false); | |
* SearchQueryResult result = CouchbaseFullTextService.findByDateRange(bucket, INDEX_NAME, "createTime", "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", "2017-08-03T06:17:31.460Z", false, "2017-08-04T06:17:31.460Z", false); | |
* SearchQueryResult result = CouchbaseFullTextService.findByDateRange(bucket, INDEX_NAME, "attributes.activateTime", "yyyy-MM-dd'T'hh:mm:ss.SSS'Z'", "2017-08-03T06:17:31.460Z", false, "2017-08-04T06:17:31.460Z", false); | |
* </pre> | |
* | |
* @param bucket the bucket name | |
* @param searchIndex the FTS (Full-Text-Search) index name | |
* @param field the field name (ignore if null or empty) | |
* @param dateFormat the date format for parsing | |
* @param start the start date for search | |
* @param startInclusive whether to include start value in search results | |
* @param end the end date for search | |
* @param endInclusive whether to include end value in search results | |
* @return SearchQueryResult which contains document ids and hit locations (locations will be empty in this case) | |
*/ | |
public static SearchQueryResult findByDateRange(Bucket bucket, String searchIndex, String field, String dateFormat, String start, boolean startInclusive, String end, boolean endInclusive) { | |
Validate.isTrue(StringUtils.isNotEmpty(start) || StringUtils.isNotEmpty(end), "Both %s and %s values cannot be null or empty. At least one value must be provided.", "start", "end"); | |
return bucket.query(new SearchQuery(searchIndex, SearchQuery.dateRange().start(start, startInclusive).end(end, endInclusive).field(field).dateTimeParser(dateFormat))); | |
} | |
/*- | |
* <p>Find records by specified query string</p> | |
* | |
* <p>Examples:</p> | |
* <pre> | |
* SearchQueryResult result = CouchbaseFullTextService.findByQueryString(bucket, INDEX_NAME, "+janrainId:RegressionPatient -sample"); | |
* SearchQueryResult result = CouchbaseFullTextService.findByQueryString(bucket, INDEX_NAME, "+janrainId:RegressionPatient attr"); | |
* </pre> | |
* | |
* @param bucket the bucket name | |
* @param searchIndex the FTS (Full-Text-Search) index name | |
* @param queryString the user specified query string. Note that wildcards, regexp, and date range queries are not supported by this syntax | |
* @return SearchQueryResult which contains document ids and hit locations | |
*/ | |
public static SearchQueryResult findByQueryString(Bucket bucket, String searchIndex, String queryString) { | |
Validate.notEmpty(queryString, "Value of %s cannot be null or empty", "queryString"); | |
return bucket.query(new SearchQuery(searchIndex, SearchQuery.queryString(queryString))); | |
} | |
} | |
P.S. Click here to access my other posts.
Comments
Post a Comment