Skip to content

Commit 5e91014

Browse files
author
John Plaisted
authored
feat(search) BREAKING Support ElasticSearch 7, drop ES5 (#2263)
Merges in changes from our ES7 branch, and drops support for ES5. This is a breaking change due to the upgrade, we have a ES5 branch at the commit before this.
1 parent 711e023 commit 5e91014

File tree

29 files changed

+455
-418
lines changed

29 files changed

+455
-418
lines changed

build.gradle

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,15 @@ project.ext.externalDependency = [
4040
'commonsLang': 'commons-lang:commons-lang:2.6',
4141
'ebean': 'io.ebean:ebean:11.33.3',
4242
'ebeanAgent': 'io.ebean:ebean-agent:11.27.1',
43-
'elasticSearchRest': 'org.elasticsearch.client:elasticsearch-rest-high-level-client:5.6.8',
44-
'elasticSearchTransport': 'org.elasticsearch.client:transport:5.6.8',
43+
'elasticSearchRest': 'org.elasticsearch.client:elasticsearch-rest-high-level-client:7.9.3',
44+
'elasticSearchTransport': 'org.elasticsearch.client:transport:7.9.3',
4545
'findbugsAnnotations': 'com.google.code.findbugs:annotations:3.0.1',
4646
'gmaCoreModels': "com.linkedin.datahub-gma:core-models-data-template:$gmaVersion",
4747
'gmaDaoApi': "com.linkedin.datahub-gma:dao-api:$gmaVersion",
4848
'gmaDaoApiDataTemplate': "com.linkedin.datahub-gma:dao-api-data-template:$gmaVersion",
4949
'gmaEbeanDao': "com.linkedin.datahub-gma:ebean-dao:$gmaVersion",
50-
'gmaElasticsearchDao': "com.linkedin.datahub-gma:elasticsearch-dao:$gmaVersion",
51-
'gmaElasticsearchIntegTest': "com.linkedin.datahub-gma:elasticsearch-dao-integ-testing-docker:$gmaVersion",
50+
'gmaElasticsearchDao': "com.linkedin.datahub-gma:elasticsearch-dao-7:$gmaVersion",
51+
'gmaElasticsearchIntegTest': "com.linkedin.datahub-gma:elasticsearch-dao-integ-testing-docker-7:$gmaVersion",
5252
'gmaNeo4jDao': "com.linkedin.datahub-gma:neo4j-dao:$gmaVersion",
5353
'gmaRestliResources': "com.linkedin.datahub-gma:restli-resources:$gmaVersion",
5454
'gmaRestliResourcesDataTemplate': "com.linkedin.datahub-gma:restli-resources-data-template:$gmaVersion",
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
FROM python:3.8
2+
COPY . .
3+
RUN pip install --upgrade pip
4+
RUN pip install elasticsearch
5+
ENTRYPOINT ["python", "transfer.py"]

contrib/elasticsearch/es7-upgrade/transfer.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
parser = argparse.ArgumentParser(description="Transfers ES indexes between clusters.")
1515
parser.add_argument('-s', '--source', required=True, help='Source cluster URL and port.')
1616
parser.add_argument('-d', '--dest', required=True, help='Destination cluster URL and port.')
17-
parser.add_argument('--source-ssl', required=False, default=True, help='Enables / disables source SSL.')
18-
parser.add_argument('--dest-ssl', required=False, default=True, help='Enables / disables destination SSL.')
17+
parser.add_argument('--disable-source-ssl', required=False, action='store_true', help='If set, disable source SSL.')
18+
parser.add_argument('--disable-dest-ssl', required=False, action='store_true', help='If set, disable destination SSL.')
1919
parser.add_argument('--cert-file', required=False, default=None, help='Cert file to use with SSL.')
2020
parser.add_argument('--key-file', required=False, default=None, help='Key file to use with SSL.')
2121
parser.add_argument('--ca-file', required=False, default=None, help='Certificate authority file to use for SSL.')
22-
parser.add_argument('--create-only', required=False, default=False, help='If true, only create the index (with settings/mappings/aliases).')
22+
parser.add_argument('--create-only', required=False, action='store_true', help='If set, only create the index (with settings/mappings/aliases).')
2323
parser.add_argument('-i', '--indices', required=False, default="*", help='Regular expression for indexes to copy.')
2424
parser.add_argument('--name-override', required=False, default=None, help='destination index name override')
2525

@@ -207,9 +207,9 @@ def copy_index_data(clients, index, name_override):
207207

208208

209209
def main():
210-
ssl_context=create_ssl_context()
211-
source_ssl_context = ssl_context if args.source_ssl else None
212-
dest_ssl_context = ssl_context if args.dest_ssl else None
210+
ssl_context = create_ssl_context() if not args.disable_source_ssl or not args.disable_dest_ssl else None
211+
source_ssl_context = ssl_context if not args.disable_source_ssl else None
212+
dest_ssl_context = ssl_context if not args.disable_dest_ssl else None
213213
clients = EsClients(create_client(args.source, source_ssl_context), create_client(args.dest, dest_ssl_context))
214214
indices = get_index_settings(clients.source_client, args.indices)
215215

docker/docker-compose.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ services:
8686
- schema-registry
8787

8888
elasticsearch:
89-
image: elasticsearch:5.6.8
89+
image: elasticsearch:7.9.3
9090
env_file: elasticsearch/env/docker.env
9191
container_name: elasticsearch
9292
hostname: elasticsearch
@@ -96,7 +96,7 @@ services:
9696
- esdata:/usr/share/elasticsearch/data
9797

9898
kibana:
99-
image: kibana:5.6.8
99+
image: kibana:7.9.3
100100
env_file: kibana/env/docker.env
101101
container_name: kibana
102102
hostname: kibana

docker/elasticsearch-setup/create-indices.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ function create_index {
77
jq -n \
88
--slurpfile settings index/$2 \
99
--slurpfile mappings index/$3 \
10-
'.settings=$settings[0] | .mappings.doc=$mappings[0]' > /tmp/data
10+
'.settings=$settings[0] | .mappings=$mappings[0]' > /tmp/data
1111

12-
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/$1 --data @/tmp/data
12+
curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/$1 -H 'Content-Type: application/json' --data @/tmp/data
1313
}
1414

1515
create_index chartdocument chart/settings.json chart/mappings.json

docs-website/sidebars.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ module.exports = {
9494
],
9595
"Advanced Guides": [
9696
"docs/advanced/aspect-versioning",
97+
"docs/advanced/es-7-upgrade",
9798
"docs/advanced/high-cardinality",
9899
"docs/how/scsi-onboarding-guide",
99100
// WIP "docs/advanced/backfilling",

docs/advanced/es-7-upgrade.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Elasticsearch upgrade from 5.6.8 to 7.9.3
2+
3+
## Summary of changes
4+
Checkout the list of breaking changes for [Elasticsearch 6](https://www.elastic.co/guide/en/elasticsearch/reference/6.8/breaking-changes-6.0.html) and [Elasticsearch 7](https://www.elastic.co/guide/en/elasticsearch/reference/7.x/breaking-changes-7.0.html). Following is the summary of changes that impact Datahub.
5+
6+
### Search index mapping & settings
7+
- Removal of mapping types (as mentioned [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html))
8+
- Specify the maximum allowed difference between `min_gram` and `max_gram` for NGramTokenizer and NGramTokenFilter by adding property `max_ngram_diff` in index settings, particularly if the difference is greater than 1 (as mentioned [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules.html))
9+
10+
### Search query
11+
The following parameters are/were `optional` and hence automatically populated in the search query. Some tests that expect a certain search query to be sent to ES will change with the ES upgrade.
12+
- `disable_coord` parameter of the `bool` and `common_terms` queries has been removed (as mentioned [here](https://www.elastic.co/guide/en/elasticsearch/reference/6.8/breaking-changes-6.0.html))
13+
- `auto_generate_synonyms_phrase_query` parameter in `match` query is added with a default value of `true` (as mentioned [here](https://www.elastic.co/guide/en/elasticsearch/reference/7.x/query-dsl-match-query.html))
14+
15+
### Java High Level Rest Client
16+
- In 7.9.3, Java High Level Rest Client instance needs a REST low-level client builder to be built. In 5.6.8, the same instance needs REST low-level client
17+
- Document APIs such as the Index API, Delete API, etc no longer takes the doc `type` as an input
18+
19+
## Migration strategy
20+
21+
As mentioned in the docs, indices created in Elasticsearch 5.x are not readable by Elasticsearch 7.x. Running the upgraded elasticsearch container on the existing esdata volume will fail.
22+
23+
For local development, our recommendation is to run the `docker/nuke.sh` script to remove the existing esdata volume before starting up the containers. Note, all data will be lost.
24+
25+
To migrate without losing data, please refer to the python script and Dockerfile in `contrib/elasticsearch/es7-upgrade`. The script takes source and destination elasticsearch cluster URL and SSL configuration (if applicable) as input. It ports the mappings and settings for all indices in the source cluster to the destination cluster making the necessary changes stated above. Then it transfers all documents in the source cluster to the destination cluster.
26+
27+
You can run the script in a docker container as follows
28+
```
29+
docker build -t migrate-es-7 .
30+
docker run migrate-es-7 -s SOURCE -d DEST [--disable-source-ssl]
31+
[--disable-dest-ssl] [--cert-file CERT_FILE]
32+
[--key-file KEY_FILE] [--ca-file CA_FILE] [--create-only]
33+
[-i INDICES] [--name-override NAME_OVERRIDE]
34+
```
35+
36+
## Plan
37+
38+
We will create an "elasticsearch-5-legacy" branch with the version of master prior to the elasticsearch 7 upgrade. However, we will not be supporting this branch moving forward and all future development will be done using elasticsearch 7.9.3

gms/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -44,47 +44,41 @@ public class RestHighLevelClientFactory {
4444
@Bean(name = "elasticSearchRestHighLevelClient")
4545
@Nonnull
4646
protected RestHighLevelClient createInstance() {
47-
try {
48-
RestClient restClient;
49-
if (useSSL) {
50-
restClient = loadRestHttpsClient(host, port, threadCount, connectionRequestTimeout, sslContext);
51-
} else {
52-
restClient = loadRestHttpClient(host, port, threadCount, connectionRequestTimeout);
53-
}
54-
55-
return new RestHighLevelClient(restClient);
56-
} catch (Exception e) {
57-
throw new RuntimeException("Error: RestClient is not properly initialized. " + e.toString());
47+
RestClientBuilder restClientBuilder;
48+
49+
if (useSSL) {
50+
restClientBuilder = loadRestHttpsClient(host, port, threadCount, connectionRequestTimeout, sslContext);
51+
} else {
52+
restClientBuilder = loadRestHttpClient(host, port, threadCount, connectionRequestTimeout);
5853
}
54+
55+
return new RestHighLevelClient(restClientBuilder);
5956
}
6057

6158
@Nonnull
62-
private static RestClient loadRestHttpClient(@Nonnull String host, int port, int threadCount,
63-
int connectionRequestTimeout) {
59+
private static RestClientBuilder loadRestHttpClient(@Nonnull String host, int port, int threadCount,
60+
int connectionRequestTimeout) {
6461
RestClientBuilder builder = RestClient.builder(new HttpHost(host, port, "http"))
65-
.setHttpClientConfigCallback(httpAsyncClientBuilder ->
66-
httpAsyncClientBuilder.setDefaultIOReactorConfig(IOReactorConfig.custom()
67-
.setIoThreadCount(threadCount).build()));
62+
.setHttpClientConfigCallback(httpAsyncClientBuilder -> httpAsyncClientBuilder.setDefaultIOReactorConfig(
63+
IOReactorConfig.custom().setIoThreadCount(threadCount).build()));
6864

6965
builder.setRequestConfigCallback(requestConfigBuilder -> requestConfigBuilder.
70-
setConnectionRequestTimeout(connectionRequestTimeout));
66+
setConnectionRequestTimeout(connectionRequestTimeout));
7167

72-
return builder.build();
68+
return builder;
7369
}
7470

7571
@Nonnull
76-
private static RestClient loadRestHttpsClient(@Nonnull String host, int port, int threadCount,
77-
int connectionRequestTimeout, @Nonnull SSLContext sslContext) {
78-
72+
private static RestClientBuilder loadRestHttpsClient(@Nonnull String host, int port, int threadCount,
73+
int connectionRequestTimeout, @Nonnull SSLContext sslContext) {
7974
final RestClientBuilder builder = RestClient.builder(new HttpHost(host, port, "https"))
80-
.setHttpClientConfigCallback(httpAsyncClientBuilder -> httpAsyncClientBuilder.setSSLContext(sslContext)
81-
.setSSLHostnameVerifier(new NoopHostnameVerifier())
82-
.setDefaultIOReactorConfig(IOReactorConfig.custom().setIoThreadCount(threadCount).build()));
75+
.setHttpClientConfigCallback(httpAsyncClientBuilder -> httpAsyncClientBuilder.setSSLContext(sslContext)
76+
.setSSLHostnameVerifier(new NoopHostnameVerifier())
77+
.setDefaultIOReactorConfig(IOReactorConfig.custom().setIoThreadCount(threadCount).build()));
8378

8479
builder.setRequestConfigCallback(requestConfigBuilder -> requestConfigBuilder.
85-
setConnectionRequestTimeout(connectionRequestTimeout));
80+
setConnectionRequestTimeout(connectionRequestTimeout));
8681

87-
return builder.build();
82+
return builder;
8883
}
8984
}
90-

gms/impl/src/main/resources/index/chart/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"index": {
3+
"max_ngram_diff": 17,
34
"analysis": {
45
"filter": {
56
"autocomplete_filter": {

gms/impl/src/main/resources/index/corp-user/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"index": {
3+
"max_ngram_diff": 17,
34
"analysis": {
45
"filter": {
56
"autocomplete_filter": {

0 commit comments

Comments
 (0)