mirror of
https://github.com/navidrome/navidrome.git
synced 2026-06-19 07:37:15 +00:00
fix(opensubsonic): make search3 empty-query pagination fast at large offsets (#5601)
* fix(subsonic): make search3 empty-query pagination fast at large offsets Empty-query search3 (used by clients like Symfonium to sync the whole library) degraded linearly with songOffset: the offset optimization in optimizePagination keeps the original query's LEFT JOINs (annotation, bookmark, library) inside its rowid NOT IN subquery, making it as slow as plain OFFSET (~5s per page at offset 900K on a 920K-track library). Rewrite the empty-query branch of doSearch to use the same two-phase approach as the FTS search: Phase 1 paginates rowids on the bare main table, which SQLite satisfies with a covering index at any offset; Phase 2 hydrates only the page's rows with all JOINs. The Phase 2 hydration logic is extracted into hydrateRowidPage, now shared with ftsSearch.execute. Also replace the media_file_missing index with a composite covering index on (missing, library_id), so Phase 1 stays covering for non-admin users, whose queries include a library_id filter. The composite serves all missing-only lookups via its prefix. With a 920K-track / 85K-album test library, search3 empty-query responses are now flat (~0.1s) at every offset, for both admin and non-admin users (previously 3-5s at offsets above 600K). * refactor(persistence): share search Phase 1 contract and dedup junction fan-out Extract the Phase 1 query assembly that was duplicated between the FTS search and the empty-query search into executeTwoPhase: both paths now supply only their strategy-specific FROM/JOINs and ORDER BY, while the shared contract (missing filter, library access, options.Filters, and Max/Offset semantics) lives in one place. Also fix a pagination integrity bug: the artist library filter joins the library_artist junction table, so an artist present in multiple libraries produced duplicate rowids in Phase 1, corrupting offset-based pagination (short pages and repeated artists during full-library syncs). Phase 1 now applies DISTINCT whenever a junction-based LibraryFilter is set. DISTINCT is used instead of GROUP BY because bm25() cannot be evaluated in a grouped query; plain-filter tables (media_file, album) skip the dedup so their Phase 1 keeps the streaming covering-index plan. This also fixes the same duplication in the pre-existing FTS search path. * fix(persistence): pin artist search Phase 1 join order with CROSS JOIN search3 always filters artists by library (library_artist.library_id IN ...), and with the junction JOIN in the search Phase 1 rowid query SQLite chose to drive from library_artist, sorting every junction row with a temp b-tree on each page — a flat ~200ms penalty per request at 405K artists, even at offset 0 (the previous code avoided this by accident: its GROUP BY artist.id pinned an artist-driven plan). Use CROSS JOIN (SQLite's explicit join-order override) in a search-only variant of the artist library filter, keeping artist as the outer table so Phase 1 streams rowids in artist.id order from the primary key index and LIMIT/OFFSET short-circuits. The DISTINCT dedup stays and costs nothing under the streaming plan. Other artist queries keep the planner's freedom. With 405K artists, empty-query artist search is now 0.07s at offset 0 and 0.25s at offset 399K end-to-end (was 0.31s/0.34s before this fix, and up to 1.2s on master at deep offsets). Artist FTS text search is unaffected.
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
-- +goose Up
|
||||
-- Covering index for the rowid-only pagination query used by search3 with an empty query
|
||||
-- (full library sync). It must cover both `missing` and `library_id` so SQLite never touches
|
||||
-- the (wide) media_file rows while skipping over large offsets.
|
||||
-- Replaces media_file_missing: the composite serves all `missing = ?` lookups via its prefix.
|
||||
create index if not exists media_file_missing_library_id
|
||||
on media_file(missing, library_id);
|
||||
drop index if exists media_file_missing;
|
||||
|
||||
-- +goose Down
|
||||
create index if not exists media_file_missing
|
||||
on media_file(missing);
|
||||
drop index if exists media_file_missing_library_id;
|
||||
@@ -540,13 +540,28 @@ func (r *artistRepository) RefreshStats(allArtists bool) (int64, error) {
|
||||
return totalRowsAffected, nil
|
||||
}
|
||||
|
||||
// applyLibraryFilterToSearchQuery is applyLibraryFilterToArtistQuery with the join order
|
||||
// pinned via CROSS JOIN (SQLite's explicit join-order override): the search Phase 1 paginates
|
||||
// rowids by artist.id, and when the planner drives from library_artist it must sort every
|
||||
// junction row on every page (temp b-tree over the whole table). Keeping artist as the outer
|
||||
// table streams rows in artist.id order from its primary key index, so LIMIT/OFFSET
|
||||
// short-circuits. Search-only: other artist queries keep the planner's freedom.
|
||||
func (r *artistRepository) applyLibraryFilterToSearchQuery(query SelectBuilder) SelectBuilder {
|
||||
user := loggedUser(r.ctx)
|
||||
query = query.CrossJoin("library_artist on library_artist.artist_id = artist.id")
|
||||
if user.ID != invalidUserId && !user.IsAdmin {
|
||||
query = query.Join("user_library on user_library.library_id = library_artist.library_id AND user_library.user_id = ?", user.ID)
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
func (r *artistRepository) searchCfg() searchConfig {
|
||||
return searchConfig{
|
||||
// Natural order for artists is more performant by ID, due to GROUP BY clause in selectArtist
|
||||
NaturalOrder: "artist.id",
|
||||
OrderBy: []string{"sum(json_extract(stats, '$.total.m')) desc", "name"},
|
||||
MBIDFields: []string{"mbz_artist_id"},
|
||||
LibraryFilter: r.applyLibraryFilterToArtistQuery,
|
||||
LibraryFilter: r.applyLibraryFilterToSearchQuery,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package persistence
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
@@ -594,6 +595,66 @@ var _ = Describe("ArtistRepository", func() {
|
||||
})
|
||||
})
|
||||
|
||||
Context("Empty Query (sync pagination)", func() {
|
||||
It("does not duplicate artists that belong to multiple libraries", func() {
|
||||
// An artist in two libraries has two library_artist rows; pagination
|
||||
// must still enumerate it exactly once, at a stable offset.
|
||||
Expect(lr.AddArtist(lib2.ID, artistBeatles.ID)).To(Succeed())
|
||||
|
||||
all, err := repo.Search("", model.QueryOptions{Max: 1000})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
seen := map[string]bool{}
|
||||
var paged model.Artists
|
||||
for offset := range len(all) {
|
||||
page, err := repo.Search("", model.QueryOptions{Max: 1, Offset: offset})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
for _, a := range page {
|
||||
Expect(seen[a.ID]).To(BeFalse(), fmt.Sprintf("artist %s returned twice", a.ID))
|
||||
seen[a.ID] = true
|
||||
}
|
||||
paged = append(paged, page...)
|
||||
}
|
||||
Expect(paged).To(HaveLen(len(all)))
|
||||
})
|
||||
|
||||
It("paginates all artists in natural order without overlaps or gaps", func() {
|
||||
all, err := repo.Search("", model.QueryOptions{Max: 1000})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(all)).To(BeNumerically(">", 1))
|
||||
|
||||
var paged model.Artists
|
||||
pageSize := 2
|
||||
for offset := 0; offset < len(all); offset += pageSize {
|
||||
page, err := repo.Search("", model.QueryOptions{Max: pageSize, Offset: offset})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
paged = append(paged, page...)
|
||||
}
|
||||
Expect(paged).To(HaveLen(len(all)))
|
||||
for i := range all {
|
||||
Expect(paged[i].ID).To(Equal(all[i].ID))
|
||||
}
|
||||
})
|
||||
|
||||
It("respects library filtering for restricted users", func() {
|
||||
// Create an artist only in library 2 (not accessible to restricted user)
|
||||
lib2Artist := model.Artist{ID: "empty-query-lib2-artist", Name: "Empty Query Lib2 Artist"}
|
||||
Expect(repo.Put(&lib2Artist)).To(Succeed())
|
||||
Expect(lr.AddArtist(lib2.ID, lib2Artist.ID)).To(Succeed())
|
||||
|
||||
results, err := restrictedRepo.Search("", model.QueryOptions{Max: 1000})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
for _, a := range results {
|
||||
Expect(a.ID).ToNot(Equal(lib2Artist.ID), "Empty query search should respect library filtering")
|
||||
}
|
||||
|
||||
// Clean up
|
||||
if raw, ok := repo.(*artistRepository); ok {
|
||||
_, _ = raw.executeSQL(squirrel.Delete(raw.tableName).Where(squirrel.Eq{"id": lib2Artist.ID}))
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
Context("Headless Processes (No User Context)", func() {
|
||||
It("should see all artists from all libraries when no user is in context", func() {
|
||||
// Add artists to different libraries
|
||||
|
||||
@@ -652,6 +652,49 @@ var _ = Describe("MediaRepository", func() {
|
||||
_, _ = raw.executeSQL(squirrel.Delete(raw.tableName).Where(squirrel.Eq{"id": missingMediaFile.ID}))
|
||||
})
|
||||
})
|
||||
|
||||
Context("empty query (natural order pagination)", func() {
|
||||
It("returns all non-missing files in natural order", func() {
|
||||
results, err := mr.Search("", model.QueryOptions{Max: 1000})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(results).ToNot(BeEmpty())
|
||||
for _, result := range results {
|
||||
Expect(result.Missing).To(BeFalse())
|
||||
}
|
||||
})
|
||||
|
||||
It(`treats quoted empty query ("") the same as empty`, func() {
|
||||
all, err := mr.Search("", model.QueryOptions{Max: 1000})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
quoted, err := mr.Search(`""`, model.QueryOptions{Max: 1000})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(quoted).To(HaveLen(len(all)))
|
||||
})
|
||||
|
||||
It("paginates without overlaps or gaps", func() {
|
||||
all, err := mr.Search("", model.QueryOptions{Max: 1000})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(all)).To(BeNumerically(">", 3))
|
||||
|
||||
var paged model.MediaFiles
|
||||
pageSize := 3
|
||||
for offset := 0; offset < len(all); offset += pageSize {
|
||||
page, err := mr.Search("", model.QueryOptions{Max: pageSize, Offset: offset})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
paged = append(paged, page...)
|
||||
}
|
||||
Expect(paged).To(HaveLen(len(all)))
|
||||
for i := range all {
|
||||
Expect(paged[i].ID).To(Equal(all[i].ID), fmt.Sprintf("row %d differs", i))
|
||||
}
|
||||
})
|
||||
|
||||
It("returns empty page when offset is beyond the total", func() {
|
||||
results, err := mr.Search("", model.QueryOptions{Max: 10, Offset: 100000})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(results).To(BeEmpty())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Describe("FindByPaths", func() {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package persistence
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
. "github.com/Masterminds/squirrel"
|
||||
@@ -20,8 +21,10 @@ type searchConfig struct {
|
||||
NaturalOrder string // ORDER BY for empty-query results (e.g. "album.rowid")
|
||||
OrderBy []string // ORDER BY for text search results (e.g. ["name"])
|
||||
MBIDFields []string // columns to match when query is a UUID
|
||||
// LibraryFilter overrides the default applyLibraryFilter for FTS Phase 1.
|
||||
// Needed when library access requires a junction table (e.g. artist → library_artist).
|
||||
// LibraryFilter overrides the default applyLibraryFilter for the rowid Phase 1 of
|
||||
// two-phase searches (FTS and empty-query). Needed when library access goes through a
|
||||
// junction table (e.g. artist → library_artist), whose JOIN can fan out rowids for
|
||||
// entities in multiple libraries — Phase 1 dedups whenever this is set.
|
||||
LibraryFilter func(sq SelectBuilder) SelectBuilder
|
||||
}
|
||||
|
||||
@@ -57,8 +60,8 @@ func (r sqlRepository) doSearch(sq SelectBuilder, q string, results any, cfg sea
|
||||
|
||||
// Empty query (OpenSubsonic `search3?query=""`) — return all in natural order.
|
||||
if q == "" || q == `""` {
|
||||
sq = sq.OrderBy(cfg.NaturalOrder)
|
||||
return r.queryAll(sq, results, options)
|
||||
rowidCore := Select(r.tableName + ".rowid").From(r.tableName).OrderBy(cfg.NaturalOrder)
|
||||
return r.executeTwoPhase(sq, results, rowidCore, cfg, options)
|
||||
}
|
||||
|
||||
// MBID search: if query is a valid UUID, search by MBID fields instead
|
||||
@@ -82,6 +85,53 @@ func (r sqlRepository) doSearch(sq SelectBuilder, q string, results any, cfg sea
|
||||
return strategy.execute(r, sq, results, cfg, options)
|
||||
}
|
||||
|
||||
// executeTwoPhase runs a search in two phases:
|
||||
// - Phase 1: rowidCore (strategy-specific FROM/JOINs and ORDER BY) plus the shared search
|
||||
// contract applied here — non-missing rows only, library access, options.Filters, and
|
||||
// pagination. Keeping Phase 1 free of the full SELECT's JOINs lets SQLite paginate via a
|
||||
// covering index; with those JOINs, large offsets degrade to O(offset) join probes —
|
||||
// multi-second responses on 100k+ libraries.
|
||||
// - Phase 2: full SELECT with all JOINs, scoped to Phase 1's rowid page.
|
||||
func (r sqlRepository) executeTwoPhase(sq SelectBuilder, results any, rowidCore SelectBuilder, cfg searchConfig, options model.QueryOptions) error {
|
||||
rowidQuery := rowidCore.
|
||||
Where(Eq{r.tableName + ".missing": false})
|
||||
if options.Max > 0 {
|
||||
rowidQuery = rowidQuery.Limit(uint64(options.Max))
|
||||
}
|
||||
if options.Offset > 0 {
|
||||
rowidQuery = rowidQuery.Offset(uint64(options.Offset))
|
||||
}
|
||||
if cfg.LibraryFilter != nil {
|
||||
// Junction-table library filters can repeat rowids for entities in multiple
|
||||
// libraries, which would corrupt offset-based pagination — dedup before paginating.
|
||||
// (DISTINCT, not GROUP BY: bm25() can't be evaluated in a grouped query.)
|
||||
rowidQuery = cfg.LibraryFilter(rowidQuery).Distinct()
|
||||
} else {
|
||||
rowidQuery = r.applyLibraryFilter(rowidQuery)
|
||||
}
|
||||
if options.Filters != nil {
|
||||
rowidQuery = rowidQuery.Where(options.Filters)
|
||||
}
|
||||
return r.hydrateRowidPage(sq, rowidQuery, results)
|
||||
}
|
||||
|
||||
// hydrateRowidPage joins sq to the ordered rowid set produced by rowidQuery, preserving its
|
||||
// ordering. rowidQuery must handle pagination itself; sq's LIMIT/OFFSET are stripped.
|
||||
func (r sqlRepository) hydrateRowidPage(sq SelectBuilder, rowidQuery SelectBuilder, results any) error {
|
||||
rowidSQL, rowidArgs, err := rowidQuery.ToSql()
|
||||
if err != nil {
|
||||
return fmt.Errorf("building rowid query: %w", err)
|
||||
}
|
||||
sq = sq.RemoveLimit().RemoveOffset()
|
||||
rankedSubquery := fmt.Sprintf(
|
||||
"(SELECT rowid as _rid, row_number() OVER () AS _rn FROM (%s)) AS _ranked",
|
||||
rowidSQL,
|
||||
)
|
||||
sq = sq.Join(rankedSubquery+" ON "+r.tableName+".rowid = _ranked._rid", rowidArgs...)
|
||||
sq = sq.OrderBy("_ranked._rn")
|
||||
return r.queryAll(sq, results)
|
||||
}
|
||||
|
||||
func mbidExpr(tableName, mbid string, mbidFields ...string) Sqlizer {
|
||||
if uuid.Validate(mbid) != nil || len(mbidFields) == 0 {
|
||||
return nil
|
||||
|
||||
@@ -284,11 +284,9 @@ func (s *ftsSearch) ToSql() (string, []any, error) {
|
||||
return sql, []any{s.matchExpr}, nil
|
||||
}
|
||||
|
||||
// execute runs a two-phase FTS5 search:
|
||||
// - Phase 1: lightweight rowid query (main table + FTS + library filter) for ranking and pagination.
|
||||
// - Phase 2: full SELECT with all JOINs, scoped to Phase 1's rowid set.
|
||||
//
|
||||
// Complex ORDER BY (function calls, aggregations) are dropped from Phase 1.
|
||||
// execute runs a two-phase FTS5 search (see executeTwoPhase): Phase 1 here contributes the
|
||||
// FTS MATCH join and BM25 rank ordering. Complex ORDER BY (function calls, aggregations) are
|
||||
// dropped from Phase 1.
|
||||
func (s *ftsSearch) execute(r sqlRepository, sq SelectBuilder, dest any, cfg searchConfig, options model.QueryOptions) error {
|
||||
qualifiedOrderBys := []string{s.rankExpr}
|
||||
for _, ob := range cfg.OrderBy {
|
||||
@@ -297,45 +295,11 @@ func (s *ftsSearch) execute(r sqlRepository, sq SelectBuilder, dest any, cfg sea
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 1: fresh query — must set LIMIT/OFFSET from options explicitly.
|
||||
// Mirror applyOptions behavior: Max=0 means no limit, not LIMIT 0.
|
||||
rowidQuery := Select(s.tableName+".rowid").
|
||||
rowidCore := Select(s.tableName+".rowid").
|
||||
From(s.tableName).
|
||||
Join(s.ftsTable+" ON "+s.ftsTable+".rowid = "+s.tableName+".rowid AND "+s.ftsTable+" MATCH ?", s.matchExpr).
|
||||
Where(Eq{s.tableName + ".missing": false}).
|
||||
OrderBy(qualifiedOrderBys...)
|
||||
if options.Max > 0 {
|
||||
rowidQuery = rowidQuery.Limit(uint64(options.Max))
|
||||
}
|
||||
if options.Offset > 0 {
|
||||
rowidQuery = rowidQuery.Offset(uint64(options.Offset))
|
||||
}
|
||||
|
||||
// Library filter + musicFolderId must be applied here, before pagination.
|
||||
if cfg.LibraryFilter != nil {
|
||||
rowidQuery = cfg.LibraryFilter(rowidQuery)
|
||||
} else {
|
||||
rowidQuery = r.applyLibraryFilter(rowidQuery)
|
||||
}
|
||||
if options.Filters != nil {
|
||||
rowidQuery = rowidQuery.Where(options.Filters)
|
||||
}
|
||||
|
||||
rowidSQL, rowidArgs, err := rowidQuery.ToSql()
|
||||
if err != nil {
|
||||
return fmt.Errorf("building FTS rowid query: %w", err)
|
||||
}
|
||||
|
||||
// Phase 2: strip LIMIT/OFFSET from sq (Phase 1 handled pagination),
|
||||
// join on the ranked rowid set to hydrate with full columns.
|
||||
sq = sq.RemoveLimit().RemoveOffset()
|
||||
rankedSubquery := fmt.Sprintf(
|
||||
"(SELECT rowid as _rid, row_number() OVER () AS _rn FROM (%s)) AS _ranked",
|
||||
rowidSQL,
|
||||
)
|
||||
sq = sq.Join(rankedSubquery+" ON "+s.tableName+".rowid = _ranked._rid", rowidArgs...)
|
||||
sq = sq.OrderBy("_ranked._rn")
|
||||
return r.queryAll(sq, dest)
|
||||
return r.executeTwoPhase(sq, dest, rowidCore, cfg, options)
|
||||
}
|
||||
|
||||
// qualifyOrderBy prepends tableName to a simple column name. Returns empty string for
|
||||
|
||||
Reference in New Issue
Block a user