fix(nginx_log): repair advanced search filters and status faceting (#1687)

The advanced log search filters were almost entirely non-functional:

- The status filter issued a text term query against the numeric
  "status" field, so it never matched any document.
- The IP and method filters used the field names "remote_addr" and
  "request_method", which do not exist in the index ("ip" / "method").
- The path, user agent, referer, browser, OS and device filters were
  never wired into the query builder.

Faceting on the numeric "status" field also used a terms facet, which
cannot bucket a numeric field and produced garbage prefix-coded terms.

Changes:
- Correct the IP and method field names in the query builder.
- Match status codes with inclusive numeric range queries.
- Wire in the missing path / user agent / referer (match phrase) and
  browser / OS / device (term) filters.
- Split comma-joined browser/OS/device values in the search handler so
  multi-select works.
- Facet the numeric status field with numeric ranges so the status
  code distribution is accurate.
- Add regression tests covering every filter and the status facet
  against a real Bleve index.

Closes #1669

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jacky
2026-05-22 14:23:26 +08:00
committed by GitHub
parent bb15da5c70
commit c4259c15e2
5 changed files with 357 additions and 18 deletions
+21 -7
View File
@@ -4,6 +4,7 @@ import (
"context"
"net/http"
"sort"
"strings"
"time"
"github.com/0xJacky/Nginx-UI/internal/nginx"
@@ -163,6 +164,19 @@ func GetLogPreflight(c *gin.Context) {
c.JSON(http.StatusOK, response)
}
// splitCommaSeparated splits a comma-joined filter value (as produced by the
// frontend multi-select inputs) into a slice of trimmed, non-empty values.
func splitCommaSeparated(value string) []string {
parts := strings.Split(value, ",")
result := make([]string, 0, len(parts))
for _, part := range parts {
if trimmed := strings.TrimSpace(part); trimmed != "" {
result = append(result, trimmed)
}
}
return result
}
// AdvancedSearchLogs provides advanced search capabilities for logs
func AdvancedSearchLogs(c *gin.Context) {
var req AdvancedSearchRequest
@@ -270,13 +284,13 @@ func AdvancedSearchLogs(c *gin.Context) {
searchReq.Referers = []string{req.Referer}
}
if req.Browser != "" {
searchReq.Browsers = []string{req.Browser}
searchReq.Browsers = splitCommaSeparated(req.Browser)
}
if req.OS != "" {
searchReq.OSs = []string{req.OS}
searchReq.OSs = splitCommaSeparated(req.OS)
}
if req.Device != "" {
searchReq.Devices = []string{req.Device}
searchReq.Devices = splitCommaSeparated(req.Device)
}
if len(req.Status) > 0 {
searchReq.StatusCodes = req.Status
@@ -663,7 +677,7 @@ func GetWorldMapData(c *gin.Context) {
EndTime: req.EndTime,
LogPath: req.Path,
LogPaths: []string{req.Path}, // Use single main log path
UseMainLogPath: true, // Use main_log_path field for efficient queries
UseMainLogPath: true, // Use main_log_path field for efficient queries
Limit: req.Limit,
}
logger.Debugf("WorldMapData - GeoQueryRequest: %+v", geoReq)
@@ -764,7 +778,7 @@ func GetChinaMapData(c *gin.Context) {
EndTime: req.EndTime,
LogPath: req.Path,
LogPaths: []string{req.Path}, // Use single main log path
UseMainLogPath: true, // Use main_log_path field for efficient queries
UseMainLogPath: true, // Use main_log_path field for efficient queries
Limit: req.Limit,
}
logger.Debugf("ChinaMapData - GeoQueryRequest: %+v", geoReq)
@@ -863,7 +877,7 @@ func GetGeoStats(c *gin.Context) {
EndTime: req.EndTime,
LogPath: req.Path,
LogPaths: []string{req.Path}, // Use single main log path
UseMainLogPath: true, // Use main_log_path field for efficient queries
UseMainLogPath: true, // Use main_log_path field for efficient queries
Limit: req.Limit,
}
@@ -878,7 +892,7 @@ func GetGeoStats(c *gin.Context) {
for i, stat := range stats {
statsInterface[i] = stat
}
c.JSON(http.StatusOK, GeoStatsResponse{
Stats: statsInterface,
})
+30
View File
@@ -0,0 +1,30 @@
package nginx_log
import (
"testing"
"github.com/stretchr/testify/assert"
)
// TestSplitCommaSeparated verifies that comma-joined filter values produced by
// the frontend multi-select inputs (browser/os/device) are split back into
// individual values so each can be matched independently.
func TestSplitCommaSeparated(t *testing.T) {
tests := []struct {
name string
input string
want []string
}{
{"single value", "Chrome", []string{"Chrome"}},
{"multiple values", "Chrome,Firefox", []string{"Chrome", "Firefox"}},
{"values containing spaces", "Internet Explorer,Samsung Browser", []string{"Internet Explorer", "Samsung Browser"}},
{"trims surrounding whitespace", " Chrome , Firefox ", []string{"Chrome", "Firefox"}},
{"drops empty segments", "Chrome,,Firefox,", []string{"Chrome", "Firefox"}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, splitCommaSeparated(tt.input))
})
}
}
+95 -11
View File
@@ -3,7 +3,6 @@ package searcher
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"github.com/blevesearch/bleve/v2"
@@ -64,7 +63,7 @@ func (qb *QueryBuilder) BuildQuery(req *SearchRequest) (query.Query, error) {
// Add IP address filters
if len(req.IPAddresses) > 0 {
if ipQuery := qb.buildTermsQuery("remote_addr", req.IPAddresses); ipQuery != nil {
if ipQuery := qb.buildTermsQuery("ip", req.IPAddresses); ipQuery != nil {
boolQuery.AddMust(ipQuery)
}
}
@@ -78,7 +77,7 @@ func (qb *QueryBuilder) BuildQuery(req *SearchRequest) (query.Query, error) {
// Add method filters
if len(req.Methods) > 0 {
if methodQuery := qb.buildTermsQuery("request_method", req.Methods); methodQuery != nil {
if methodQuery := qb.buildTermsQuery("method", req.Methods); methodQuery != nil {
boolQuery.AddMust(methodQuery)
}
}
@@ -90,12 +89,54 @@ func (qb *QueryBuilder) BuildQuery(req *SearchRequest) (query.Query, error) {
}
}
// Add request path filters
if len(req.Paths) > 0 {
if pathQuery := qb.buildMatchPhraseQuery("path", req.Paths); pathQuery != nil {
boolQuery.AddMust(pathQuery)
}
}
// Add user agent filters
if len(req.UserAgents) > 0 {
if uaQuery := qb.buildMatchPhraseQuery("user_agent", req.UserAgents); uaQuery != nil {
boolQuery.AddMust(uaQuery)
}
}
// Add referer filters
if len(req.Referers) > 0 {
if refererQuery := qb.buildMatchPhraseQuery("referer", req.Referers); refererQuery != nil {
boolQuery.AddMust(refererQuery)
}
}
// Add browser filters
if len(req.Browsers) > 0 {
if browserQuery := qb.buildTermsQuery("browser", req.Browsers); browserQuery != nil {
boolQuery.AddMust(browserQuery)
}
}
// Add operating system filters
if len(req.OSs) > 0 {
if osQuery := qb.buildTermsQuery("os", req.OSs); osQuery != nil {
boolQuery.AddMust(osQuery)
}
}
// Add device type filters
if len(req.Devices) > 0 {
if deviceQuery := qb.buildTermsQuery("device_type", req.Devices); deviceQuery != nil {
boolQuery.AddMust(deviceQuery)
}
}
// Log the final query structure for debugging.
queryBytes, err := json.Marshal(boolQuery)
if err == nil {
logger.Debugf("Constructed Bleve Query: %s", string(queryBytes))
}
// Additional debug: if using main_log_path, also log a diagnostic query without filters
if len(req.LogPaths) > 0 && req.UseMainLogPath {
logger.Debugf("DEBUG: Dashboard query using main_log_path field with path: %v", req.LogPaths)
@@ -150,19 +191,63 @@ func (qb *QueryBuilder) buildTermsQuery(field string, terms []string) query.Quer
return boolQuery
}
// buildStatusCodeQuery builds a status code query
// buildMatchPhraseQuery builds a match phrase query for analyzed text fields.
// A phrase query matches the analyzed terms in order, which keeps filters such
// as request path precise instead of loosely matching any single shared token.
// Multiple values are combined with boolean OR.
func (qb *QueryBuilder) buildMatchPhraseQuery(field string, values []string) query.Query {
if len(values) == 0 {
return nil
}
if len(values) == 1 {
phraseQuery := bleve.NewMatchPhraseQuery(values[0])
phraseQuery.SetField(field)
return phraseQuery
}
// For multiple values, use boolean OR
boolQuery := bleve.NewBooleanQuery()
for _, value := range values {
phraseQuery := bleve.NewMatchPhraseQuery(value)
phraseQuery.SetField(field)
boolQuery.AddShould(phraseQuery)
}
boolQuery.SetMinShould(1) // Crucial for OR behavior
return boolQuery
}
// buildStatusCodeQuery builds a status code query.
// The status field is indexed as a numeric field, so each code is matched
// with an inclusive numeric range query instead of a text term query.
func (qb *QueryBuilder) buildStatusCodeQuery(codes []int) query.Query {
if len(codes) == 0 {
return nil
}
// Convert status codes to strings for term query
terms := make([]string, len(codes))
for i, code := range codes {
terms[i] = strconv.Itoa(code)
if len(codes) == 1 {
return qb.buildStatusCodeRangeQuery(codes[0])
}
return qb.buildTermsQuery("status", terms)
// For multiple status codes, use boolean OR
boolQuery := bleve.NewBooleanQuery()
for _, code := range codes {
boolQuery.AddShould(qb.buildStatusCodeRangeQuery(code))
}
boolQuery.SetMinShould(1) // Crucial for OR behavior
return boolQuery
}
// buildStatusCodeRangeQuery builds an inclusive numeric range query that
// matches exactly one status code on the numeric "status" field.
func (qb *QueryBuilder) buildStatusCodeRangeQuery(code int) query.Query {
value := float64(code)
inclusive := true
rangeQuery := bleve.NewNumericRangeInclusiveQuery(&value, &value, &inclusive, &inclusive)
rangeQuery.SetField("status")
return rangeQuery
}
// ValidateSearchRequest validates a search request
@@ -179,7 +264,6 @@ func (qb *QueryBuilder) ValidateSearchRequest(req *SearchRequest) error {
return fmt.Errorf("offset cannot be negative")
}
// Validate sort order
if req.SortOrder != "" && req.SortOrder != SortOrderAsc && req.SortOrder != SortOrderDesc {
return fmt.Errorf("invalid sort order: %s", req.SortOrder)
+55
View File
@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"strconv"
"sync"
"sync/atomic"
"time"
@@ -284,6 +285,21 @@ func (s *Searcher) convertBleveResult(bleveResult *bleve.SearchResult) *SearchRe
// This addresses the issue where Bleve may incorrectly aggregate Total values
// across multiple shards in IndexAlias
convertedFacet.Total = len(facetTerms)
} else if len(facet.NumericRanges) > 0 {
// Numeric range facets (e.g. the numeric status field) report their
// buckets as NumericRanges. Expose non-empty ranges as terms so all
// facets can be consumed uniformly.
convertedFacet.Terms = make([]*FacetTerm, 0, len(facet.NumericRanges))
for _, nr := range facet.NumericRanges {
if nr.Count == 0 {
continue
}
convertedFacet.Terms = append(convertedFacet.Terms, &FacetTerm{
Term: nr.Name,
Count: nr.Count,
})
}
convertedFacet.Total = len(convertedFacet.Terms)
} else {
// If there are no terms, Total should be 0
convertedFacet.Total = 0
@@ -341,6 +357,11 @@ func (s *Searcher) configureSearchRequest(searchReq *bleve.SearchRequest, req *S
size = req.FacetSize
}
facet := bleve.NewFacetRequest(field, size)
// The status field is indexed as numeric, so a terms facet cannot
// bucket it. Add one numeric range per HTTP status code instead.
if field == "status" {
addStatusCodeRanges(facet)
}
searchReq.AddFacet(field, facet)
}
}
@@ -353,6 +374,40 @@ func (s *Searcher) configureSearchRequest(searchReq *bleve.SearchRequest, req *S
}
}
// statusCodeFacetRange is a precomputed numeric facet bucket for one HTTP
// status code.
type statusCodeFacetRange struct {
name string
min float64
max float64
}
// statusCodeFacetRanges holds one numeric range per HTTP status code (100-599).
// It is built once at startup so that faceting the numeric status field does
// not rebuild the full range set on every search request.
var statusCodeFacetRanges = func() []statusCodeFacetRange {
ranges := make([]statusCodeFacetRange, 0, 500)
for code := 100; code <= 599; code++ {
ranges = append(ranges, statusCodeFacetRange{
name: strconv.Itoa(code),
min: float64(code),
max: float64(code + 1),
})
}
return ranges
}()
// addStatusCodeRanges attaches one numeric range per HTTP status code to the
// facet request. The status field is indexed as numeric, so it must be faceted
// with numeric ranges rather than terms; each range is named after the status
// code so the converted facet exposes the code itself as the term.
func addStatusCodeRanges(facet *bleve.FacetRequest) {
for i := range statusCodeFacetRanges {
r := &statusCodeFacetRanges[i]
facet.AddNumericRange(r.name, &r.min, &r.max)
}
}
// Utility methods
func (s *Searcher) setRequestDefaults(req *SearchRequest) {
@@ -0,0 +1,156 @@
package searcher
import (
"context"
"testing"
"time"
"github.com/0xJacky/Nginx-UI/internal/nginx_log/indexer"
"github.com/blevesearch/bleve/v2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// filterTestDoc describes a single log document used by the filter tests.
type filterTestDoc struct {
id string
fields map[string]interface{}
}
// newFilterTestSearcher builds an in-memory Bleve index using the production
// log index mapping, indexes a fixed set of documents, and returns a Searcher
// over that index. This lets the filter tests exercise the real query/index
// behavior instead of only checking that a query object is non-nil.
func newFilterTestSearcher(t *testing.T) *Searcher {
t.Helper()
index, err := bleve.NewMemOnly(indexer.CreateLogIndexMapping())
require.NoError(t, err)
baseTime := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC).Unix()
docs := []filterTestDoc{
{
id: "doc1",
fields: map[string]interface{}{
"timestamp": baseTime, "ip": "192.168.1.1", "method": "GET", "status": 200,
"path": "/api/products", "path_exact": "/api/products", "bytes_sent": 1024,
"browser": "Chrome", "os": "Windows", "device_type": "Desktop",
"referer": "examplereferer", "user_agent": "chromeua",
"file_path": "/var/log/nginx/access.log", "main_log_path": "/var/log/nginx/access.log",
"raw": "doc1",
},
},
{
id: "doc2",
fields: map[string]interface{}{
"timestamp": baseTime, "ip": "192.168.1.2", "method": "POST", "status": 200,
"path": "/api/login", "path_exact": "/api/login", "bytes_sent": 2048,
"browser": "Firefox", "os": "Ubuntu", "device_type": "Desktop",
"referer": "otherreferer", "user_agent": "firefoxua",
"file_path": "/var/log/nginx/access.log", "main_log_path": "/var/log/nginx/access.log",
"raw": "doc2",
},
},
{
id: "doc3",
fields: map[string]interface{}{
"timestamp": baseTime, "ip": "10.0.0.1", "method": "GET", "status": 404,
"path": "/missing/page", "path_exact": "/missing/page", "bytes_sent": 512,
"browser": "Chrome", "os": "Android", "device_type": "Mobile",
"referer": "otherreferer", "user_agent": "chromeua",
"file_path": "/var/log/nginx/access.log", "main_log_path": "/var/log/nginx/access.log",
"raw": "doc3",
},
},
{
id: "doc4",
fields: map[string]interface{}{
"timestamp": baseTime, "ip": "10.0.0.2", "method": "DELETE", "status": 500,
"path": "/error", "path_exact": "/error", "bytes_sent": 256,
"browser": "Safari", "os": "iOS", "device_type": "Mobile",
"referer": "otherreferer", "user_agent": "safariua",
"file_path": "/var/log/nginx/access.log", "main_log_path": "/var/log/nginx/access.log",
"raw": "doc4",
},
},
}
for _, doc := range docs {
require.NoError(t, index.Index(doc.id, doc.fields))
}
// Searcher.Stop does not close the underlying shards, so close the
// in-memory index here to avoid leaking it across tests.
t.Cleanup(func() { _ = index.Close() })
config := DefaultSearcherConfig()
config.EnableCache = false
return NewSearcher(config, []bleve.Index{index})
}
// TestSearcherFieldFilters verifies that every advanced-search filter narrows
// the result set to the matching documents. It is a regression test for the
// log filter returning no entries (GitHub issue #1669).
func TestSearcherFieldFilters(t *testing.T) {
s := newFilterTestSearcher(t)
defer func() { _ = s.Stop() }()
tests := []struct {
name string
req *SearchRequest
wantIDs []string
}{
{"status code 200", &SearchRequest{StatusCodes: []int{200}}, []string{"doc1", "doc2"}},
{"status code 404", &SearchRequest{StatusCodes: []int{404}}, []string{"doc3"}},
{"multiple status codes", &SearchRequest{StatusCodes: []int{200, 500}}, []string{"doc1", "doc2", "doc4"}},
{"ip address", &SearchRequest{IPAddresses: []string{"192.168.1.1"}}, []string{"doc1"}},
{"http method", &SearchRequest{Methods: []string{"GET"}}, []string{"doc1", "doc3"}},
{"browser", &SearchRequest{Browsers: []string{"Chrome"}}, []string{"doc1", "doc3"}},
{"operating system", &SearchRequest{OSs: []string{"Android"}}, []string{"doc3"}},
{"device type", &SearchRequest{Devices: []string{"Mobile"}}, []string{"doc3", "doc4"}},
// doc1 path "/api/products" shares the "api" token with "/api/login",
// so this also verifies the path filter matches the token sequence
// rather than loosely matching any single shared token.
{"request path", &SearchRequest{Paths: []string{"/api/login"}}, []string{"doc2"}},
{"referer", &SearchRequest{Referers: []string{"examplereferer"}}, []string{"doc1"}},
{"user agent", &SearchRequest{UserAgents: []string{"chromeua"}}, []string{"doc1", "doc3"}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.req.Limit = 100
result, err := s.Search(context.Background(), tt.req)
require.NoError(t, err)
gotIDs := make([]string, 0, len(result.Hits))
for _, hit := range result.Hits {
gotIDs = append(gotIDs, hit.ID)
}
assert.ElementsMatch(t, tt.wantIDs, gotIDs,
"filter %q should return only the matching documents", tt.name)
})
}
}
// TestSearcherStatusFacet verifies that faceting on the numeric status field
// produces correct per-code buckets. A plain terms facet cannot bucket a
// numeric field, so the searcher must facet it with numeric ranges.
func TestSearcherStatusFacet(t *testing.T) {
s := newFilterTestSearcher(t)
defer func() { _ = s.Stop() }()
result, err := s.Search(context.Background(), &SearchRequest{
Limit: 100,
IncludeFacets: true,
FacetFields: []string{"status"},
})
require.NoError(t, err)
statusFacet, ok := result.Facets["status"]
require.True(t, ok, "status facet should be present in the result")
got := make(map[string]int)
for _, term := range statusFacet.Terms {
got[term.Term] = term.Count
}
assert.Equal(t, map[string]int{"200": 2, "404": 1, "500": 1}, got)
}