Building a Site Search Engine with Elasticsearch, Spring Boot, and IK Analyzer
This article demonstrates how to construct a full‑text site search solution by selecting Elasticsearch as the search engine, Spring Boot for the backend service, and the IK analyzer for Chinese tokenization, covering environment setup, project architecture, key code implementations, UI pages, and a concise conclusion on the effectiveness of the approach.
Abstract : For companies with growing data volumes, quickly locating information is challenging; this guide shows how to build an internal search engine using Elasticsearch, Spring Boot, and the IK Chinese analyzer.
1. Technology Selection
Search engine service: Elasticsearch
Web service framework: Spring Boot
Chinese tokenization: IK Analyzer
2. Environment Preparation
Install Elasticsearch (e.g., version 7.5.1) and optionally Kibana, then download the matching IK plugin and place it under the plugins directory of the Elasticsearch installation.
3. Project Architecture
Data is indexed with the IK analyzer, stored in Elasticsearch, queried via the Elasticsearch Java high‑level client, and exposed through Spring Boot REST endpoints. The overall flow is:
Use IK to segment Chinese text.
Store the processed documents in an ES index.
Perform multi‑field search (author, content, title) using the ES client.
Return results through a Spring Boot controller and render them with Thymeleaf.
4. Implementation Effects
The prototype provides a simple search page similar to Baidu and a result page that lists matched articles with author, content snippet, and a link to the original URL.
5. Code Implementation
5.1 Entity Definition
package com.lbh.es.entity;
import com.fasterxml.jackson.annotation.JsonIgnore;
import javax.persistence.*;
/**
* PUT articles
* {
* "mappings": {
* "properties": {
* "author": {"type": "text"},
* "content": {"type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_smart"},
* "title": {"type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_smart"},
* "createDate": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd"},
* "url": {"type": "text"}
* }
* },
* "settings": {
* "index": {"number_of_shards": 1, "number_of_replicas": 2}
* }
* }
*/
@Entity
@Table(name = "es_article")
public class ArticleEntity {
@Id
@JsonIgnore
@GeneratedValue(strategy = GenerationType.IDENTITY)
private long id;
@Column(name = "author")
private String author;
@Column(name = "content", columnDefinition = "TEXT")
private String content;
@Column(name = "title")
private String title;
@Column(name = "createDate")
private String createDate;
@Column(name = "url")
private String url;
// getters and setters omitted for brevity
}5.2 Elasticsearch Client Configuration
package com.lbh.es.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.*;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.*;
@Configuration
public class EsConfig {
@Value("${elasticsearch.schema}")
private String schema;
@Value("${elasticsearch.address}")
private String address;
@Value("${elasticsearch.connectTimeout}")
private int connectTimeout;
@Value("${elasticsearch.socketTimeout}")
private int socketTimeout;
@Value("${elasticsearch.connectionRequestTimeout}")
private int tryConnTimeout;
@Value("${elasticsearch.maxConnectNum}")
private int maxConnNum;
@Value("${elasticsearch.maxConnectPerRoute}")
private int maxConnectPerRoute;
@Bean
public RestHighLevelClient restHighLevelClient() {
List
hostLists = new ArrayList<>();
String[] hostList = address.split(",");
for (String addr : hostList) {
String host = addr.split(":")[0];
String port = addr.split(":")[1];
hostLists.add(new HttpHost(host, Integer.parseInt(port), schema));
}
HttpHost[] httpHost = hostLists.toArray(new HttpHost[]{});
RestClientBuilder builder = RestClient.builder(httpHost);
builder.setRequestConfigCallback(requestConfigBuilder -> {
requestConfigBuilder.setConnectTimeout(connectTimeout);
requestConfigBuilder.setSocketTimeout(socketTimeout);
requestConfigBuilder.setConnectionRequestTimeout(tryConnTimeout);
return requestConfigBuilder;
});
builder.setHttpClientConfigCallback(httpClientBuilder -> {
httpClientBuilder.setMaxConnTotal(maxConnNum);
httpClientBuilder.setMaxConnPerRoute(maxConnectPerRoute);
return httpClientBuilder;
});
return new RestHighLevelClient(builder);
}
}5.3 Service Layer (Core Operations)
package com.lbh.es.service;
import com.google.gson.Gson;
import com.lbh.es.entity.ArticleEntity;
import com.lbh.es.repository.ArticleRepository;
import org.elasticsearch.action.*;
import org.elasticsearch.client.*;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.*;
@Service
public class ArticleService {
private static final String ARTICLE_INDEX = "article";
@Resource
private RestHighLevelClient client;
@Resource
private ArticleRepository articleRepository;
public boolean createIndexOfArticle() {
Settings settings = Settings.builder()
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 1)
.build();
String mapping = "{\"properties\":{\"author\":{\"type\":\"text\"},\"content\":{\"type\":\"text\",\"analyzer\":\"ik_max_word\",\"search_analyzer\":\"ik_smart\"},\"title\":{\"type\":\"text\",\"analyzer\":\"ik_max_word\",\"search_analyzer\":\"ik_smart\"},\"createDate\":{\"type\":\"date\",\"format\":\"yyyy-MM-dd HH:mm:ss||yyyy-MM-dd\"},\"url\":{\"type\":\"text\"}}}";
CreateIndexRequest indexRequest = new CreateIndexRequest(ARTICLE_INDEX)
.settings(settings)
.mapping(mapping, XContentType.JSON);
try {
CreateIndexResponse response = client.indices().create(indexRequest, RequestOptions.DEFAULT);
return response.isAcknowledged();
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
public boolean deleteArticle() {
DeleteIndexRequest request = new DeleteIndexRequest(ARTICLE_INDEX);
try {
AcknowledgedResponse response = client.indices().delete(request, RequestOptions.DEFAULT);
return response.isAcknowledged();
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
public IndexResponse addArticle(ArticleEntity article) {
Gson gson = new Gson();
String json = gson.toJson(article);
IndexRequest indexRequest = new IndexRequest(ARTICLE_INDEX).source(json, XContentType.JSON);
try {
return client.index(indexRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public void transferFromMysql() {
articleRepository.findAll().forEach(this::addArticle);
}
public List
queryByKey(String keyword) {
SearchRequest request = new SearchRequest();
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.query(QueryBuilders.multiMatchQuery(keyword, "author", "content", "title"));
request.source(builder);
List
result = new ArrayList<>();
try {
SearchResponse response = client.search(request, RequestOptions.DEFAULT);
for (SearchHit hit : response.getHits()) {
Map
map = hit.getSourceAsMap();
ArticleEntity item = new ArticleEntity();
item.setAuthor((String) map.get("author"));
item.setContent((String) map.get("content"));
item.setTitle((String) map.get("title"));
item.setUrl((String) map.get("url"));
result.add(item);
}
return result;
} catch (IOException e) {
e.printStackTrace();
return Collections.emptyList();
}
}
public ArticleEntity queryById(String indexId) {
GetRequest request = new GetRequest(ARTICLE_INDEX, indexId);
try {
GetResponse response = client.get(request, RequestOptions.DEFAULT);
if (response != null && response.isExists()) {
return new Gson().fromJson(response.getSourceAsString(), ArticleEntity.class);
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}5.4 Controller (REST API)
package com.lbh.es.controller;
import com.lbh.es.entity.ArticleEntity;
import com.lbh.es.service.ArticleService;
import org.elasticsearch.action.index.IndexResponse;
import org.springframework.web.bind.annotation.*;
import javax.annotation.Resource;
import java.util.List;
@RestController
@RequestMapping("article")
public class ArticleController {
@Resource
private ArticleService articleService;
@GetMapping("/create")
public boolean create() {
return articleService.createIndexOfArticle();
}
@GetMapping("/delete")
public boolean delete() {
return articleService.deleteArticle();
}
@PostMapping("/add")
public IndexResponse add(@RequestBody ArticleEntity article) {
return articleService.addArticle(article);
}
@GetMapping("/fransfer")
public String transfer() {
articleService.transferFromMysql();
return "successful";
}
@GetMapping("/query")
public List
query(String keyword) {
return articleService.queryByKey(keyword);
}
}5.5 Thymeleaf Pages
The search page ( search.html ) contains a simple form with an input box and a submit button, while the result page ( result.html ) iterates over the articles model attribute and renders each entry as a linked list item.
6. Conclusion
Elasticsearch offers efficient, scalable full‑text search for large datasets; combined with Spring Boot it enables rapid development of a site‑wide search service, and the IK analyzer makes Chinese text searchable with minimal effort.
Top Architect
Top Architect focuses on sharing practical architecture knowledge, covering enterprise, system, website, large‑scale distributed, and high‑availability architectures, plus architecture adjustments using internet technologies. We welcome idea‑driven, sharing‑oriented architects to exchange and learn together.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.