Skip to content
Advertisement

Distinct value in elastic search with extra field inside {“key”: “xyz”, “doc_count”: 1}

I am working on Elastic Search (version 7.16) with Oython (version 3.6)

I have the below rows in Elastic Search:

{"owner": "john", "database": "postgres", "table": "sales_tab"},
{"owner": "hannah", "database": "mongodb", "table": "dept_tab"},
{"owner": "peter", "database": "mysql", "table": "new_tab"},
{"owner": "jim", "database": "postgres", "table": "cust_tab"},
{"owner": "lima", "database": "postgres", "table": "sales_tab"},
{"owner": "tory", "database": "oracle", "table": "store_tab"},
{"owner": "kane", "database": "mysql", "table": "trasit_tab"},
{"owner": "roma", "database": "mongodb", "table": "common_tab"},
{"owner": "ashley", "database": "mongodb", "table": "common_tab"},

With the below query:

{
    "size": 0,
    "aggs": {
        "table_grouped": {
          "terms": {
            "field": "table",
            "size": 100000
          }
        }
      }
}

I get distinct table values, something like below:

    {..., 'aggregations': {'table_grouped': {'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0, 
'buckets': [{'key': 'sales_tab', 'doc_count': 3}, {'key': 'dept_tab', 'doc_count': 1}, 
{'key': 'new_tab', 'doc_count': 1}, {'key': 'cust_tab', 'doc_count': 1}, 
{'key': 'store_tab', 'doc_count': 1}, {'key': 'trasit_tab', 'doc_count': 1}, 
{'key': 'common_tab', 'doc_count': 2}]}}}

But what I actually want is:

{..., 'aggregations': {'table_grouped': {'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0, 
'buckets': [{'key': 'sales_tab', 'doc_count': 2, "database": "postgres"}, {'key': 'dept_tab', 
'doc_count': 1, "database": "mongodb"}, {'key': 'new_tab', 'doc_count': 1, 
"database": "mysql"}, {'key': 'cust_tab', 'doc_count': 1, "database": "postgres"}, 
{'key': 'store_tab', 'doc_count': 1, "database": "oracle"}, {'key': 'trasit_tab', 'doc_count': 1, "database": "mysql"}, 
{'key': 'common_tab', 'doc_count': 2, "database": "mongodb"}}]}}}

I want to know from which database is this table coming from, not just {'key': 'sales_tab', 'doc_count': 2} like extra key: value of database {'key': 'sales_tab', 'doc_count': 2, "database": "postgres"} value in buckets result or any other solution which will give distinct table along with the database it is coming from.

How do I achieve it?

Advertisement

Answer

You can use sub aggregation for getting database name as shown below:

{
  "size": 0,
  "aggs": {
    "table_grouped": {
      "terms": {
        "field": "table",
        "size": 10
      },
      "aggs": {
        "database": {
          "terms": {
            "field": "database",
            "size": 10
          }
        }
      }
    }
  }
}

This will generate response as shown below:

"aggregations": {
    "table_grouped": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "common_tab",
          "doc_count": 2,
          "database": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "mongodb",
                "doc_count": 2
              }
            ]
          }
        },
        {
          "key": "sales_tab",
          "doc_count": 2,
          "database": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "postgres",
                "doc_count": 2
              }
            ]
          }
        },
        {
          "key": "cust_tab",
          "doc_count": 1,
          "database": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "postgres",
                "doc_count": 1
              }
            ]
          }
        },
        {
          "key": "dept_tab",
          "doc_count": 1,
          "database": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "mongodb",
                "doc_count": 1
              }
            ]
          }
        },
        {
          "key": "new_tab",
          "doc_count": 1,
          "database": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "mysql",
                "doc_count": 1
              }
            ]
          }
        },
        {
          "key": "store_tab",
          "doc_count": 1,
          "database": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "oracle",
                "doc_count": 1
              }
            ]
          }
        },
        {
          "key": "trasit_tab",
          "doc_count": 1,
          "database": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "mysql",
                "doc_count": 1
              }
            ]
          }
        }
      ]
    }
  }
Advertisement