Skip to content

Grouping

Grouping (also known as field collapsing) groups search results by a field value, useful for de-duplication, entity grouping, or showing representative results from each category.

Solr Documentation: Result Grouping

Configuration Approaches

from taiyo.parsers import ExtendedDisMaxQueryParser
from taiyo.params import GroupParamsConfig

parser = ExtendedDisMaxQueryParser(
    query="python programming",
    query_fields={"title": 2.0, "content": 1.0},
    configs=[GroupParamsConfig(by="author", limit=3, ngroups=True)],
)
from taiyo.parsers import ExtendedDisMaxQueryParser

parser = ExtendedDisMaxQueryParser(
    query="python programming", query_fields={"title": 2.0, "content": 1.0}
).group(by="author", limit=3, ngroups=True)

Basic Usage

parser = ExtendedDisMaxQueryParser(
    query="python", query_fields={"title": 2.0, "content": 1.0}
).group(
    by="author",  # Group by author field
    limit=3,  # Top 3 docs per group
    ngroups=True,  # Return number of groups
)

results = client.search(parser)

# Access grouped results (typed)
if results.grouping:
    author_group = results.grouping.grouped["author"]
    for group in author_group.groups:
        author = group.group_value
        docs = group.doclist["docs"]
        print(f"\n{author} ({len(docs)} results)")
        for doc in docs:
            print(f"  - {doc['title']}")

Key Parameters

GroupParamsConfig(
    by="author",  # Field to group by (single-valued, indexed)
    limit=3,  # Docs per group
    offset=0,  # Offset within groups
    sort="score desc",  # Sort within groups
    format="grouped",  # Response format: grouped or simple
    main=False,  # Use main result list
    ngroups=True,  # Return total group count
    truncate=False,  # Truncate facets to group leaders
    facet=False,  # Enable group-aware faceting
    # Query-based grouping
    query=["category:programming", "category:databases"],  # Custom query groups
    # Function-based grouping (not supported in SolrCloud)
    func="floor(price)",  # Group by function result
)

Refer to the Apache Solr documentation for the full list of parameters and defaults.

Handling Results

Grouping responses are available through SolrResponse.grouping:

  • SolrResponse.docs: Flattened list of all documents across groups
  • SolrResponse.grouping.grouped[field_name]: Grouped results by field (typed)
  • Each group is a SolrGroup with group_value, doclist (with nested docs/counts)
  • ngroups: Total number of unique groups when enabled

Example:

parser = ExtendedDisMaxQueryParser(
    query="python", query_fields={"title": 2.0, "content": 1.0}
).group(
    by="author",
    limit=3,
    sort="year desc",
    ngroups=True,
)

results = client.search(parser)

if results.grouping:
    author_group = results.grouping.grouped["author"]
    # Total number of groups
    if author_group.ngroups is not None:
        print(f"Found {author_group.ngroups} authors\n")
    # Iterate through groups
    for group in author_group.groups:
        author = group.group_value
        doclist = group.doclist
        total = doclist["numFound"]
        docs = doclist["docs"]
        print(f"\n{author} ({total} total)")
        for doc in docs:
            print(f"  - {doc['title']}")

# Query-based grouping example
parser = ExtendedDisMaxQueryParser(
    query="technology", query_fields={"content": 1.0}
).group(
    query=["category:programming", "category:databases"],
    limit=5,
)

results = client.search(parser)

if results.grouping:
    for query_str in ["category:programming", "category:databases"]:
        group = results.grouping.grouped[query_str]
        print(f"{query_str}: {group.matches} matches")

Next Steps