Big Data 18 min read

Building a Site Search Engine with Elasticsearch, Spring Boot, and IK Analyzer

This article demonstrates how to construct a full‑text site search solution by selecting Elasticsearch as the search engine, Spring Boot for the backend service, and the IK analyzer for Chinese tokenization, covering environment setup, project architecture, key code implementations, UI pages, and a concise conclusion on the effectiveness of the approach.

Top Architect
Top Architect
Top Architect
Building a Site Search Engine with Elasticsearch, Spring Boot, and IK Analyzer

Abstract : For companies with growing data volumes, quickly locating information is challenging; this guide shows how to build an internal search engine using Elasticsearch, Spring Boot, and the IK Chinese analyzer.

1. Technology Selection

Search engine service: Elasticsearch

Web service framework: Spring Boot

Chinese tokenization: IK Analyzer

2. Environment Preparation

Install Elasticsearch (e.g., version 7.5.1) and optionally Kibana, then download the matching IK plugin and place it under the plugins directory of the Elasticsearch installation.

3. Project Architecture

Data is indexed with the IK analyzer, stored in Elasticsearch, queried via the Elasticsearch Java high‑level client, and exposed through Spring Boot REST endpoints. The overall flow is:

Use IK to segment Chinese text.

Store the processed documents in an ES index.

Perform multi‑field search (author, content, title) using the ES client.

Return results through a Spring Boot controller and render them with Thymeleaf.

4. Implementation Effects

The prototype provides a simple search page similar to Baidu and a result page that lists matched articles with author, content snippet, and a link to the original URL.

5. Code Implementation

5.1 Entity Definition

package com.lbh.es.entity;

import com.fasterxml.jackson.annotation.JsonIgnore;
import javax.persistence.*;

/**
 * PUT articles
 * {
 *   "mappings": {
 *     "properties": {
 *       "author": {"type": "text"},
 *       "content": {"type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_smart"},
 *       "title": {"type": "text", "analyzer": "ik_max_word", "search_analyzer": "ik_smart"},
 *       "createDate": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd"},
 *       "url": {"type": "text"}
 *     }
 *   },
 *   "settings": {
 *     "index": {"number_of_shards": 1, "number_of_replicas": 2}
 *   }
 * }
 */
@Entity
@Table(name = "es_article")
public class ArticleEntity {
    @Id
    @JsonIgnore
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    private long id;
    @Column(name = "author")
    private String author;
    @Column(name = "content", columnDefinition = "TEXT")
    private String content;
    @Column(name = "title")
    private String title;
    @Column(name = "createDate")
    private String createDate;
    @Column(name = "url")
    private String url;
    // getters and setters omitted for brevity
}

5.2 Elasticsearch Client Configuration

package com.lbh.es.config;

import org.apache.http.HttpHost;
import org.elasticsearch.client.*;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.*;

@Configuration
public class EsConfig {
    @Value("${elasticsearch.schema}")
    private String schema;
    @Value("${elasticsearch.address}")
    private String address;
    @Value("${elasticsearch.connectTimeout}")
    private int connectTimeout;
    @Value("${elasticsearch.socketTimeout}")
    private int socketTimeout;
    @Value("${elasticsearch.connectionRequestTimeout}")
    private int tryConnTimeout;
    @Value("${elasticsearch.maxConnectNum}")
    private int maxConnNum;
    @Value("${elasticsearch.maxConnectPerRoute}")
    private int maxConnectPerRoute;

    @Bean
    public RestHighLevelClient restHighLevelClient() {
        List
hostLists = new ArrayList<>();
        String[] hostList = address.split(",");
        for (String addr : hostList) {
            String host = addr.split(":")[0];
            String port = addr.split(":")[1];
            hostLists.add(new HttpHost(host, Integer.parseInt(port), schema));
        }
        HttpHost[] httpHost = hostLists.toArray(new HttpHost[]{});
        RestClientBuilder builder = RestClient.builder(httpHost);
        builder.setRequestConfigCallback(requestConfigBuilder -> {
            requestConfigBuilder.setConnectTimeout(connectTimeout);
            requestConfigBuilder.setSocketTimeout(socketTimeout);
            requestConfigBuilder.setConnectionRequestTimeout(tryConnTimeout);
            return requestConfigBuilder;
        });
        builder.setHttpClientConfigCallback(httpClientBuilder -> {
            httpClientBuilder.setMaxConnTotal(maxConnNum);
            httpClientBuilder.setMaxConnPerRoute(maxConnectPerRoute);
            return httpClientBuilder;
        });
        return new RestHighLevelClient(builder);
    }
}

5.3 Service Layer (Core Operations)

package com.lbh.es.service;

import com.google.gson.Gson;
import com.lbh.es.entity.ArticleEntity;
import com.lbh.es.repository.ArticleRepository;
import org.elasticsearch.action.*;
import org.elasticsearch.client.*;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.*;

@Service
public class ArticleService {
    private static final String ARTICLE_INDEX = "article";
    @Resource
    private RestHighLevelClient client;
    @Resource
    private ArticleRepository articleRepository;

    public boolean createIndexOfArticle() {
        Settings settings = Settings.builder()
                .put("index.number_of_shards", 1)
                .put("index.number_of_replicas", 1)
                .build();
        String mapping = "{\"properties\":{\"author\":{\"type\":\"text\"},\"content\":{\"type\":\"text\",\"analyzer\":\"ik_max_word\",\"search_analyzer\":\"ik_smart\"},\"title\":{\"type\":\"text\",\"analyzer\":\"ik_max_word\",\"search_analyzer\":\"ik_smart\"},\"createDate\":{\"type\":\"date\",\"format\":\"yyyy-MM-dd HH:mm:ss||yyyy-MM-dd\"},\"url\":{\"type\":\"text\"}}}";
        CreateIndexRequest indexRequest = new CreateIndexRequest(ARTICLE_INDEX)
                .settings(settings)
                .mapping(mapping, XContentType.JSON);
        try {
            CreateIndexResponse response = client.indices().create(indexRequest, RequestOptions.DEFAULT);
            return response.isAcknowledged();
        } catch (IOException e) {
            e.printStackTrace();
            return false;
        }
    }

    public boolean deleteArticle() {
        DeleteIndexRequest request = new DeleteIndexRequest(ARTICLE_INDEX);
        try {
            AcknowledgedResponse response = client.indices().delete(request, RequestOptions.DEFAULT);
            return response.isAcknowledged();
        } catch (IOException e) {
            e.printStackTrace();
            return false;
        }
    }

    public IndexResponse addArticle(ArticleEntity article) {
        Gson gson = new Gson();
        String json = gson.toJson(article);
        IndexRequest indexRequest = new IndexRequest(ARTICLE_INDEX).source(json, XContentType.JSON);
        try {
            return client.index(indexRequest, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    public void transferFromMysql() {
        articleRepository.findAll().forEach(this::addArticle);
    }

    public List
queryByKey(String keyword) {
        SearchRequest request = new SearchRequest();
        SearchSourceBuilder builder = new SearchSourceBuilder();
        builder.query(QueryBuilders.multiMatchQuery(keyword, "author", "content", "title"));
        request.source(builder);
        List
result = new ArrayList<>();
        try {
            SearchResponse response = client.search(request, RequestOptions.DEFAULT);
            for (SearchHit hit : response.getHits()) {
                Map
map = hit.getSourceAsMap();
                ArticleEntity item = new ArticleEntity();
                item.setAuthor((String) map.get("author"));
                item.setContent((String) map.get("content"));
                item.setTitle((String) map.get("title"));
                item.setUrl((String) map.get("url"));
                result.add(item);
            }
            return result;
        } catch (IOException e) {
            e.printStackTrace();
            return Collections.emptyList();
        }
    }

    public ArticleEntity queryById(String indexId) {
        GetRequest request = new GetRequest(ARTICLE_INDEX, indexId);
        try {
            GetResponse response = client.get(request, RequestOptions.DEFAULT);
            if (response != null && response.isExists()) {
                return new Gson().fromJson(response.getSourceAsString(), ArticleEntity.class);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }
}

5.4 Controller (REST API)

package com.lbh.es.controller;

import com.lbh.es.entity.ArticleEntity;
import com.lbh.es.service.ArticleService;
import org.elasticsearch.action.index.IndexResponse;
import org.springframework.web.bind.annotation.*;
import javax.annotation.Resource;
import java.util.List;

@RestController
@RequestMapping("article")
public class ArticleController {
    @Resource
    private ArticleService articleService;

    @GetMapping("/create")
    public boolean create() {
        return articleService.createIndexOfArticle();
    }

    @GetMapping("/delete")
    public boolean delete() {
        return articleService.deleteArticle();
    }

    @PostMapping("/add")
    public IndexResponse add(@RequestBody ArticleEntity article) {
        return articleService.addArticle(article);
    }

    @GetMapping("/fransfer")
    public String transfer() {
        articleService.transferFromMysql();
        return "successful";
    }

    @GetMapping("/query")
    public List
query(String keyword) {
        return articleService.queryByKey(keyword);
    }
}

5.5 Thymeleaf Pages

The search page ( search.html ) contains a simple form with an input box and a submit button, while the result page ( result.html ) iterates over the articles model attribute and renders each entry as a linked list item.

6. Conclusion

Elasticsearch offers efficient, scalable full‑text search for large datasets; combined with Spring Boot it enables rapid development of a site‑wide search service, and the IK analyzer makes Chinese text searchable with minimal effort.

JavaBackend DevelopmentElasticsearchSpring BootFull-Text SearchIK Analyzer
Top Architect
Written by

Top Architect

Top Architect focuses on sharing practical architecture knowledge, covering enterprise, system, website, large‑scale distributed, and high‑availability architectures, plus architecture adjustments using internet technologies. We welcome idea‑driven, sharing‑oriented architects to exchange and learn together.

0 followers
Reader feedback

How this landed with the community

login Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.