操作环境

下面的程序和运行环境都是使用Laradock部署的

  • Elasticsearch 7.6.1
  • Kibana 7.6.1
  • PHP 7.2
  • elasticsearch/elasticsearch ~7.6.0

示例

导入数据

所有的DSL语句都在Kibana的Dev Tools工具中执行.

# 导入的数据取材自参考资料二
# 书籍文档信息的集合(有以下字段:title(标题), authors(作者), summary(摘要), publish_date(发布日期)和 num_reviews(浏览数)
POST /bookdb_index/_bulk
{"index":{"_id":1}}
{"title":"Elasticsearch: The Definitive Guide","authors":["clinton gormley","zachary tong"],"summary":"A distibuted real-time search and analytics engine","publish_date":"2015-02-07","num_reviews":20,"publisher":"oreilly"}
{"index":{"_id":2}}
{"title":"Taming Text: How to Find, Organize, and Manipulate It","authors":["grant ingersoll","thomas morton","drew farris"],"summary":"organize text using approaches such as full-text search, proper name recognition, clustering, tagging, information extraction, and summarization","publish_date":"2013-01-24","num_reviews":12,"publisher":"manning"}
{"index":{"_id":3}}
{"title":"Elasticsearch in Action","authors":["radu gheorge","matthew lee hinman","roy russo"],"summary":"build scalable search applications using Elasticsearch without having to do complex low-level programming or understand advanced data science algorithms","publish_date":"2015-12-03","num_reviews":18,"publisher":"manning"}
{"index":{"_id":4}}
{"title":"Solr in Action","authors":["trey grainger","timothy potter"],"summary":"Comprehensive guide to implementing a scalable search engine using Apache Solr","publish_date":"2014-04-05","num_reviews":23,"publisher":"manning"}

DSL

代码

# 下面这些筛选条件完全是为了举例写法才写这么多的...
GET /bookdb_index/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "multi_match": {
            "query": "elasticsearch guide",
            "fields": []
          }
        },
        {
          "match_phrase_prefix": {
            "title": {
              "query": "elastic",
              "slop": 3,
              "max_expansions": 10
            }
          }
        },
        {
          "wildcard": {
            "title": "elastic*"
          }
        },
        {
          "regexp": {
            "title": "elastic.*ch"
          }
        },
        {
          "range": {
            "num_reviews": {
              "from": 10,
              "to": 20,
              "include_lower": true,
              "include_upper": true
            }
          }
        }
      ],
      "must_not": [
        {
          "terms": {
            "publish_date": [
              "2015-12-04"
            ]
          }
        }
      ],
      "should": [
        {
          "match": {
            "publisher": "manning"
          }
        }
      ],
      "filter": [
        {
          "exists": {
            "field": "authors"
          }
        },
        {
          "range": {
            "num_reviews": {
              "gte": 12,
              "lt": 30
            }
          }
        }
      ]
    }
  },
  "highlight": {
    "fields": {
      "title": {},
      "summary": {}
    }
  },
  "aggs": {
    "sum_of_reviews": {
      "sum": {
        "field": "num_reviews"
      }
    },
    "group_of_date": {
      "terms": {
        "field": "publish_date",
        "size": 10
      },
      "aggs": {
        "avg_of_reviews": {
          "avg": {
            "field": "num_reviews"
          }
        }
      }
    }
  }
}

结果

{
  "took" : 17,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 5.7691345,
    "hits" : [
      {
        "_index" : "bookdb_index",
        "_type" : "book",
        "_id" : "1",
        "_score" : 5.7691345,
        "_source" : {
          "title" : "Elasticsearch: The Definitive Guide",
          "authors" : [
            "clinton gormley",
            "zachary tong"
          ],
          "summary" : "A distibuted real-time search and analytics engine",
          "publish_date" : "2015-02-07",
          "num_reviews" : 20,
          "publisher" : "oreilly"
        },
        "highlight" : {
          "title" : [
            "<em>Elasticsearch</em>: The Definitive <em>Guide</em>"
          ]
        }
      },
      {
        "_index" : "bookdb_index",
        "_type" : "book",
        "_id" : "3",
        "_score" : 5.2062206,
        "_source" : {
          "title" : "Elasticsearch in Action",
          "authors" : [
            "radu gheorge",
            "matthew lee hinman",
            "roy russo"
          ],
          "summary" : "build scalable search applications using Elasticsearch without having to do complex low-level programming or understand advanced data science algorithms",
          "publish_date" : "2015-12-03",
          "num_reviews" : 18,
          "publisher" : "manning"
        },
        "highlight" : {
          "summary" : [
            "build scalable search applications using <em>Elasticsearch</em> without having to do complex low-level programming"
          ],
          "title" : [
            "<em>Elasticsearch</em> in Action"
          ]
        }
      }
    ]
  },
  "aggregations" : {
    "sum_of_reviews" : {
      "value" : 38.0
    },
    "group_of_date" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 1423267200000,
          "key_as_string" : "2015-02-07T00:00:00.000Z",
          "doc_count" : 1,
          "avg_of_reviews" : {
            "value" : 20.0
          }
        },
        {
          "key" : 1449100800000,
          "key_as_string" : "2015-12-03T00:00:00.000Z",
          "doc_count" : 1,
          "avg_of_reviews" : {
            "value" : 18.0
          }
        }
      ]
    }
  }
}

PHP

代码

// 要事先安装好elasticsearch/elasticsearch扩展包,注意:扩展包要匹配当前使用的Elasticsearch版本
use \Elasticsearch\ClientBuilder;

$client = ClientBuilder::fromConfig([
    'hosts' => ['elasticsearch:9200'],
    'retries' => 2,
    'handler' => ClientBuilder::singleHandler()
]);

$search = [
    // 指定索引
    'index' => 'bookdb_index',
    // 搜索主体
    'body' => [
        'query' => [
            'bool' => [
                // `must`里的每个条件都要满足
                'must' => [
                    // 多字段条件
                    [
                        'multi_match' => [
                            'query' => 'elasticsearch guide',
                            // 空数组表示搜索任意字段
                            'fields' => []
                            // 也可以指定搜索那些字段
                            // 'fields' => ['title', 'summary']
                        ]
                    ],
                    // 短语前缀条件
                    [
                        'match_phrase_prefix' => [
                            // 搜索`title`字段以`elastic`开头的文档
                            'title' => [
                                'query' => 'elastic',
                                // 调整单词顺序和不太严格的相对位置,大概是查询的分词之间最多能容纳多少个别的词
                                'slop' => 3,
                                // 用来限制查询项的数量,降低对资源需求的强度
                                'max_expansions' => 10
                            ]
                        ]
                    ],
                    // 通配符条件
                    [
                        'wildcard' => [
                            // `title`字段以`elastic`开头
                            'title' => 'elastic*'
                        ]
                    ],
                    // 正则条件
                    [
                        'regexp' => [
                            'title' => 'elastic.*ch'
                        ]
                    ],
                    // 范围条件
                    [
                        'range' => [
                            // 10≤`num_reviews`≤20
                            'num_reviews' => [
                                'from' => 10,
                                'to' => 20,
                                // 包括边界值
                                'include_lower' => true,
                                'include_upper' => true
                            ]
                        ]
                    ]
                ],
                // 必须不在`must_not`的条件范围内
                'must_not' => [
                    [
                        'terms' => [
                            'publish_date' => ['2015-12-04']
                        ]
                    ]
                ],
                // `should`里的条件只要不跟`must`和`must_not`的冲突,那满足`should`的也算(非必须满足)
                'should' => [
                    [
                        'match' => [
                            'publisher' => 'manning'
                        ]
                    ]
                ],
                // 对上面的筛选结果进行补充筛选
                'filter' => [
                    // 要求存在`authors`字段
                    [
                        'exists' => [
                            'field' => 'authors'
                        ]
                    ],
                    // 范围条件的另一种写法
                    [
                        'range' => [
                            'num_reviews' => [
                                'gte' => 12,
                                'lt' => 30
                            ]
                        ]
                    ]
                ]
            ]
        ],
        // 返回相关部分的标记html
        'highlight' => [
            'fields' => [
                // 键是表示要返回哪些字段的相关标记,值是空对象
                'title' => new stdClass(),
                'summary' => new stdClass()
            ]
        ],
        // 聚合
        'aggs' => [
            // 符合全部条件的文件的`num_reviews`字段值的总和,这个总和以`sum_of_reviews`记录下来(这个键名是自定义的)
            'sum_of_reviews' => [
                'sum' => [
                    'field' => 'num_reviews',
                ]
            ],
            // 分组
            'group_of_date' => [
                // 按`publish_date`字段分组
                'terms' => [
                    'field' => 'publish_date',
                    'size' => 10
                ],
                // 计算每个小组的`num_reviews`字段平均分
                'aggs' => [
                    'avg_of_reviews' => [
                        'avg' => [
                            'field' => 'num_reviews'
                        ]
                    ]
                ]
            ]
        ]
    ]
];

echo '<pre>';
var_dump($client->search($search));
        

输出

array(5) {
  ["took"]=>
  int(18)
  ["timed_out"]=>
  bool(false)
  ["_shards"]=>
  array(4) {
    ["total"]=>
    int(1)
    ["successful"]=>
    int(1)
    ["skipped"]=>
    int(0)
    ["failed"]=>
    int(0)
  }
  ["hits"]=>
  array(3) {
    ["total"]=>
    array(2) {
      ["value"]=>
      int(2)
      ["relation"]=>
      string(2) "eq"
    }
    ["max_score"]=>
    float(5.7691345)
    ["hits"]=>
    array(2) {
      [0]=>
      array(6) {
        ["_index"]=>
        string(12) "bookdb_index"
        ["_type"]=>
        string(4) "book"
        ["_id"]=>
        string(1) "1"
        ["_score"]=>
        float(5.7691345)
        ["_source"]=>
        array(6) {
          ["title"]=>
          string(35) "Elasticsearch: The Definitive Guide"
          ["authors"]=>
          array(2) {
            [0]=>
            string(15) "clinton gormley"
            [1]=>
            string(12) "zachary tong"
          }
          ["summary"]=>
          string(50) "A distibuted real-time search and analytics engine"
          ["publish_date"]=>
          string(10) "2015-02-07"
          ["num_reviews"]=>
          int(20)
          ["publisher"]=>
          string(7) "oreilly"
        }
        ["highlight"]=>
        array(1) {
          ["title"]=>
          array(1) {
            [0]=>
            string(53) "Elasticsearch: The Definitive Guide"
          }
        }
      }
      [1]=>
      array(6) {
        ["_index"]=>
        string(12) "bookdb_index"
        ["_type"]=>
        string(4) "book"
        ["_id"]=>
        string(1) "3"
        ["_score"]=>
        float(5.2062206)
        ["_source"]=>
        array(6) {
          ["title"]=>
          string(23) "Elasticsearch in Action"
          ["authors"]=>
          array(3) {
            [0]=>
            string(12) "radu gheorge"
            [1]=>
            string(18) "matthew lee hinman"
            [2]=>
            string(9) "roy russo"
          }
          ["summary"]=>
          string(152) "build scalable search applications using Elasticsearch without having to do complex low-level programming or understand advanced data science algorithms"
          ["publish_date"]=>
          string(10) "2015-12-03"
          ["num_reviews"]=>
          int(18)
          ["publisher"]=>
          string(7) "manning"
        }
        ["highlight"]=>
        array(2) {
          ["summary"]=>
          array(1) {
            [0]=>
            string(114) "build scalable search applications using Elasticsearch without having to do complex low-level programming"
          }
          ["title"]=>
          array(1) {
            [0]=>
            string(32) "Elasticsearch in Action"
          }
        }
      }
    }
  }
  ["aggregations"]=>
  array(2) {
    ["sum_of_reviews"]=>
    array(1) {
      ["value"]=>
      float(38)
    }
    ["group_of_date"]=>
    array(3) {
      ["doc_count_error_upper_bound"]=>
      int(0)
      ["sum_other_doc_count"]=>
      int(0)
      ["buckets"]=>
      array(2) {
        [0]=>
        array(4) {
          ["key"]=>
          int(1423267200000)
          ["key_as_string"]=>
          string(24) "2015-02-07T00:00:00.000Z"
          ["doc_count"]=>
          int(1)
          ["avg_of_reviews"]=>
          array(1) {
            ["value"]=>
            float(20)
          }
        }
        [1]=>
        array(4) {
          ["key"]=>
          int(1449100800000)
          ["key_as_string"]=>
          string(24) "2015-12-03T00:00:00.000Z"
          ["doc_count"]=>
          int(1)
          ["avg_of_reviews"]=>
          array(1) {
            ["value"]=>
            float(18)
          }
        }
      }
    }
  }
}

参考资料

下面的资料使用的Elasticsearch版本较低,有部分功能在当前7.x版本已被废弃,需要有选择地参考和使用


Jeffid
214 声望10 粉丝

新世界的开发者;