fix(opensubsonic): make search3 empty-query pagination fast at large offsets (#5601)

* fix(subsonic): make search3 empty-query pagination fast at large offsets

Empty-query search3 (used by clients like Symfonium to sync the whole
library) degraded linearly with songOffset: the offset optimization in
optimizePagination keeps the original query's LEFT JOINs (annotation,
bookmark, library) inside its rowid NOT IN subquery, making it as slow as
plain OFFSET (~5s per page at offset 900K on a 920K-track library).

Rewrite the empty-query branch of doSearch to use the same two-phase
approach as the FTS search: Phase 1 paginates rowids on the bare main
table, which SQLite satisfies with a covering index at any offset; Phase 2
hydrates only the page's rows with all JOINs. The Phase 2 hydration logic
is extracted into hydrateRowidPage, now shared with ftsSearch.execute.

Also replace the media_file_missing index with a composite covering index
on (missing, library_id), so Phase 1 stays covering for non-admin users,
whose queries include a library_id filter. The composite serves all
missing-only lookups via its prefix.

With a 920K-track / 85K-album test library, search3 empty-query responses
are now flat (~0.1s) at every offset, for both admin and non-admin users
(previously 3-5s at offsets above 600K).

* refactor(persistence): share search Phase 1 contract and dedup junction fan-out

Extract the Phase 1 query assembly that was duplicated between the FTS
search and the empty-query search into executeTwoPhase: both paths now
supply only their strategy-specific FROM/JOINs and ORDER BY, while the
shared contract (missing filter, library access, options.Filters, and
Max/Offset semantics) lives in one place.

Also fix a pagination integrity bug: the artist library filter joins the
library_artist junction table, so an artist present in multiple libraries
produced duplicate rowids in Phase 1, corrupting offset-based pagination
(short pages and repeated artists during full-library syncs). Phase 1 now
applies DISTINCT whenever a junction-based LibraryFilter is set. DISTINCT
is used instead of GROUP BY because bm25() cannot be evaluated in a
grouped query; plain-filter tables (media_file, album) skip the dedup so
their Phase 1 keeps the streaming covering-index plan. This also fixes the
same duplication in the pre-existing FTS search path.

* fix(persistence): pin artist search Phase 1 join order with CROSS JOIN

search3 always filters artists by library (library_artist.library_id IN
...), and with the junction JOIN in the search Phase 1 rowid query SQLite
chose to drive from library_artist, sorting every junction row with a temp
b-tree on each page — a flat ~200ms penalty per request at 405K artists,
even at offset 0 (the previous code avoided this by accident: its GROUP BY
artist.id pinned an artist-driven plan).

Use CROSS JOIN (SQLite's explicit join-order override) in a search-only
variant of the artist library filter, keeping artist as the outer table so
Phase 1 streams rowids in artist.id order from the primary key index and
LIMIT/OFFSET short-circuits. The DISTINCT dedup stays and costs nothing
under the streaming plan. Other artist queries keep the planner's freedom.

With 405K artists, empty-query artist search is now 0.07s at offset 0 and
0.25s at offset 399K end-to-end (was 0.31s/0.34s before this fix, and up
to 1.2s on master at deep offsets). Artist FTS text search is unaffected.
This commit is contained in:
Deluan Quintão
2026-06-12 15:53:37 -04:00
committed by GitHub
parent 3b958dd6a7
commit 5ec6e6a8d4
6 changed files with 192 additions and 46 deletions
@@ -0,0 +1,13 @@
-- +goose Up
-- Covering index for the rowid-only pagination query used by search3 with an empty query
-- (full library sync). It must cover both `missing` and `library_id` so SQLite never touches
-- the (wide) media_file rows while skipping over large offsets.
-- Replaces media_file_missing: the composite serves all `missing = ?` lookups via its prefix.
create index if not exists media_file_missing_library_id
on media_file(missing, library_id);
drop index if exists media_file_missing;
-- +goose Down
create index if not exists media_file_missing
on media_file(missing);
drop index if exists media_file_missing_library_id;
+16 -1
View File
@@ -540,13 +540,28 @@ func (r *artistRepository) RefreshStats(allArtists bool) (int64, error) {
return totalRowsAffected, nil
}
// applyLibraryFilterToSearchQuery is applyLibraryFilterToArtistQuery with the join order
// pinned via CROSS JOIN (SQLite's explicit join-order override): the search Phase 1 paginates
// rowids by artist.id, and when the planner drives from library_artist it must sort every
// junction row on every page (temp b-tree over the whole table). Keeping artist as the outer
// table streams rows in artist.id order from its primary key index, so LIMIT/OFFSET
// short-circuits. Search-only: other artist queries keep the planner's freedom.
func (r *artistRepository) applyLibraryFilterToSearchQuery(query SelectBuilder) SelectBuilder {
user := loggedUser(r.ctx)
query = query.CrossJoin("library_artist on library_artist.artist_id = artist.id")
if user.ID != invalidUserId && !user.IsAdmin {
query = query.Join("user_library on user_library.library_id = library_artist.library_id AND user_library.user_id = ?", user.ID)
}
return query
}
func (r *artistRepository) searchCfg() searchConfig {
return searchConfig{
// Natural order for artists is more performant by ID, due to GROUP BY clause in selectArtist
NaturalOrder: "artist.id",
OrderBy: []string{"sum(json_extract(stats, '$.total.m')) desc", "name"},
MBIDFields: []string{"mbz_artist_id"},
LibraryFilter: r.applyLibraryFilterToArtistQuery,
LibraryFilter: r.applyLibraryFilterToSearchQuery,
}
}
+61
View File
@@ -3,6 +3,7 @@ package persistence
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
@@ -594,6 +595,66 @@ var _ = Describe("ArtistRepository", func() {
})
})
Context("Empty Query (sync pagination)", func() {
It("does not duplicate artists that belong to multiple libraries", func() {
// An artist in two libraries has two library_artist rows; pagination
// must still enumerate it exactly once, at a stable offset.
Expect(lr.AddArtist(lib2.ID, artistBeatles.ID)).To(Succeed())
all, err := repo.Search("", model.QueryOptions{Max: 1000})
Expect(err).ToNot(HaveOccurred())
seen := map[string]bool{}
var paged model.Artists
for offset := range len(all) {
page, err := repo.Search("", model.QueryOptions{Max: 1, Offset: offset})
Expect(err).ToNot(HaveOccurred())
for _, a := range page {
Expect(seen[a.ID]).To(BeFalse(), fmt.Sprintf("artist %s returned twice", a.ID))
seen[a.ID] = true
}
paged = append(paged, page...)
}
Expect(paged).To(HaveLen(len(all)))
})
It("paginates all artists in natural order without overlaps or gaps", func() {
all, err := repo.Search("", model.QueryOptions{Max: 1000})
Expect(err).ToNot(HaveOccurred())
Expect(len(all)).To(BeNumerically(">", 1))
var paged model.Artists
pageSize := 2
for offset := 0; offset < len(all); offset += pageSize {
page, err := repo.Search("", model.QueryOptions{Max: pageSize, Offset: offset})
Expect(err).ToNot(HaveOccurred())
paged = append(paged, page...)
}
Expect(paged).To(HaveLen(len(all)))
for i := range all {
Expect(paged[i].ID).To(Equal(all[i].ID))
}
})
It("respects library filtering for restricted users", func() {
// Create an artist only in library 2 (not accessible to restricted user)
lib2Artist := model.Artist{ID: "empty-query-lib2-artist", Name: "Empty Query Lib2 Artist"}
Expect(repo.Put(&lib2Artist)).To(Succeed())
Expect(lr.AddArtist(lib2.ID, lib2Artist.ID)).To(Succeed())
results, err := restrictedRepo.Search("", model.QueryOptions{Max: 1000})
Expect(err).ToNot(HaveOccurred())
for _, a := range results {
Expect(a.ID).ToNot(Equal(lib2Artist.ID), "Empty query search should respect library filtering")
}
// Clean up
if raw, ok := repo.(*artistRepository); ok {
_, _ = raw.executeSQL(squirrel.Delete(raw.tableName).Where(squirrel.Eq{"id": lib2Artist.ID}))
}
})
})
Context("Headless Processes (No User Context)", func() {
It("should see all artists from all libraries when no user is in context", func() {
// Add artists to different libraries
+43
View File
@@ -652,6 +652,49 @@ var _ = Describe("MediaRepository", func() {
_, _ = raw.executeSQL(squirrel.Delete(raw.tableName).Where(squirrel.Eq{"id": missingMediaFile.ID}))
})
})
Context("empty query (natural order pagination)", func() {
It("returns all non-missing files in natural order", func() {
results, err := mr.Search("", model.QueryOptions{Max: 1000})
Expect(err).ToNot(HaveOccurred())
Expect(results).ToNot(BeEmpty())
for _, result := range results {
Expect(result.Missing).To(BeFalse())
}
})
It(`treats quoted empty query ("") the same as empty`, func() {
all, err := mr.Search("", model.QueryOptions{Max: 1000})
Expect(err).ToNot(HaveOccurred())
quoted, err := mr.Search(`""`, model.QueryOptions{Max: 1000})
Expect(err).ToNot(HaveOccurred())
Expect(quoted).To(HaveLen(len(all)))
})
It("paginates without overlaps or gaps", func() {
all, err := mr.Search("", model.QueryOptions{Max: 1000})
Expect(err).ToNot(HaveOccurred())
Expect(len(all)).To(BeNumerically(">", 3))
var paged model.MediaFiles
pageSize := 3
for offset := 0; offset < len(all); offset += pageSize {
page, err := mr.Search("", model.QueryOptions{Max: pageSize, Offset: offset})
Expect(err).ToNot(HaveOccurred())
paged = append(paged, page...)
}
Expect(paged).To(HaveLen(len(all)))
for i := range all {
Expect(paged[i].ID).To(Equal(all[i].ID), fmt.Sprintf("row %d differs", i))
}
})
It("returns empty page when offset is beyond the total", func() {
results, err := mr.Search("", model.QueryOptions{Max: 10, Offset: 100000})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(BeEmpty())
})
})
})
Describe("FindByPaths", func() {
+54 -4
View File
@@ -1,6 +1,7 @@
package persistence
import (
"fmt"
"strings"
. "github.com/Masterminds/squirrel"
@@ -20,8 +21,10 @@ type searchConfig struct {
NaturalOrder string // ORDER BY for empty-query results (e.g. "album.rowid")
OrderBy []string // ORDER BY for text search results (e.g. ["name"])
MBIDFields []string // columns to match when query is a UUID
// LibraryFilter overrides the default applyLibraryFilter for FTS Phase 1.
// Needed when library access requires a junction table (e.g. artist → library_artist).
// LibraryFilter overrides the default applyLibraryFilter for the rowid Phase 1 of
// two-phase searches (FTS and empty-query). Needed when library access goes through a
// junction table (e.g. artist → library_artist), whose JOIN can fan out rowids for
// entities in multiple libraries — Phase 1 dedups whenever this is set.
LibraryFilter func(sq SelectBuilder) SelectBuilder
}
@@ -57,8 +60,8 @@ func (r sqlRepository) doSearch(sq SelectBuilder, q string, results any, cfg sea
// Empty query (OpenSubsonic `search3?query=""`) — return all in natural order.
if q == "" || q == `""` {
sq = sq.OrderBy(cfg.NaturalOrder)
return r.queryAll(sq, results, options)
rowidCore := Select(r.tableName + ".rowid").From(r.tableName).OrderBy(cfg.NaturalOrder)
return r.executeTwoPhase(sq, results, rowidCore, cfg, options)
}
// MBID search: if query is a valid UUID, search by MBID fields instead
@@ -82,6 +85,53 @@ func (r sqlRepository) doSearch(sq SelectBuilder, q string, results any, cfg sea
return strategy.execute(r, sq, results, cfg, options)
}
// executeTwoPhase runs a search in two phases:
// - Phase 1: rowidCore (strategy-specific FROM/JOINs and ORDER BY) plus the shared search
// contract applied here — non-missing rows only, library access, options.Filters, and
// pagination. Keeping Phase 1 free of the full SELECT's JOINs lets SQLite paginate via a
// covering index; with those JOINs, large offsets degrade to O(offset) join probes —
// multi-second responses on 100k+ libraries.
// - Phase 2: full SELECT with all JOINs, scoped to Phase 1's rowid page.
func (r sqlRepository) executeTwoPhase(sq SelectBuilder, results any, rowidCore SelectBuilder, cfg searchConfig, options model.QueryOptions) error {
rowidQuery := rowidCore.
Where(Eq{r.tableName + ".missing": false})
if options.Max > 0 {
rowidQuery = rowidQuery.Limit(uint64(options.Max))
}
if options.Offset > 0 {
rowidQuery = rowidQuery.Offset(uint64(options.Offset))
}
if cfg.LibraryFilter != nil {
// Junction-table library filters can repeat rowids for entities in multiple
// libraries, which would corrupt offset-based pagination — dedup before paginating.
// (DISTINCT, not GROUP BY: bm25() can't be evaluated in a grouped query.)
rowidQuery = cfg.LibraryFilter(rowidQuery).Distinct()
} else {
rowidQuery = r.applyLibraryFilter(rowidQuery)
}
if options.Filters != nil {
rowidQuery = rowidQuery.Where(options.Filters)
}
return r.hydrateRowidPage(sq, rowidQuery, results)
}
// hydrateRowidPage joins sq to the ordered rowid set produced by rowidQuery, preserving its
// ordering. rowidQuery must handle pagination itself; sq's LIMIT/OFFSET are stripped.
func (r sqlRepository) hydrateRowidPage(sq SelectBuilder, rowidQuery SelectBuilder, results any) error {
rowidSQL, rowidArgs, err := rowidQuery.ToSql()
if err != nil {
return fmt.Errorf("building rowid query: %w", err)
}
sq = sq.RemoveLimit().RemoveOffset()
rankedSubquery := fmt.Sprintf(
"(SELECT rowid as _rid, row_number() OVER () AS _rn FROM (%s)) AS _ranked",
rowidSQL,
)
sq = sq.Join(rankedSubquery+" ON "+r.tableName+".rowid = _ranked._rid", rowidArgs...)
sq = sq.OrderBy("_ranked._rn")
return r.queryAll(sq, results)
}
func mbidExpr(tableName, mbid string, mbidFields ...string) Sqlizer {
if uuid.Validate(mbid) != nil || len(mbidFields) == 0 {
return nil
+5 -41
View File
@@ -284,11 +284,9 @@ func (s *ftsSearch) ToSql() (string, []any, error) {
return sql, []any{s.matchExpr}, nil
}
// execute runs a two-phase FTS5 search:
// - Phase 1: lightweight rowid query (main table + FTS + library filter) for ranking and pagination.
// - Phase 2: full SELECT with all JOINs, scoped to Phase 1's rowid set.
//
// Complex ORDER BY (function calls, aggregations) are dropped from Phase 1.
// execute runs a two-phase FTS5 search (see executeTwoPhase): Phase 1 here contributes the
// FTS MATCH join and BM25 rank ordering. Complex ORDER BY (function calls, aggregations) are
// dropped from Phase 1.
func (s *ftsSearch) execute(r sqlRepository, sq SelectBuilder, dest any, cfg searchConfig, options model.QueryOptions) error {
qualifiedOrderBys := []string{s.rankExpr}
for _, ob := range cfg.OrderBy {
@@ -297,45 +295,11 @@ func (s *ftsSearch) execute(r sqlRepository, sq SelectBuilder, dest any, cfg sea
}
}
// Phase 1: fresh query — must set LIMIT/OFFSET from options explicitly.
// Mirror applyOptions behavior: Max=0 means no limit, not LIMIT 0.
rowidQuery := Select(s.tableName+".rowid").
rowidCore := Select(s.tableName+".rowid").
From(s.tableName).
Join(s.ftsTable+" ON "+s.ftsTable+".rowid = "+s.tableName+".rowid AND "+s.ftsTable+" MATCH ?", s.matchExpr).
Where(Eq{s.tableName + ".missing": false}).
OrderBy(qualifiedOrderBys...)
if options.Max > 0 {
rowidQuery = rowidQuery.Limit(uint64(options.Max))
}
if options.Offset > 0 {
rowidQuery = rowidQuery.Offset(uint64(options.Offset))
}
// Library filter + musicFolderId must be applied here, before pagination.
if cfg.LibraryFilter != nil {
rowidQuery = cfg.LibraryFilter(rowidQuery)
} else {
rowidQuery = r.applyLibraryFilter(rowidQuery)
}
if options.Filters != nil {
rowidQuery = rowidQuery.Where(options.Filters)
}
rowidSQL, rowidArgs, err := rowidQuery.ToSql()
if err != nil {
return fmt.Errorf("building FTS rowid query: %w", err)
}
// Phase 2: strip LIMIT/OFFSET from sq (Phase 1 handled pagination),
// join on the ranked rowid set to hydrate with full columns.
sq = sq.RemoveLimit().RemoveOffset()
rankedSubquery := fmt.Sprintf(
"(SELECT rowid as _rid, row_number() OVER () AS _rn FROM (%s)) AS _ranked",
rowidSQL,
)
sq = sq.Join(rankedSubquery+" ON "+s.tableName+".rowid = _ranked._rid", rowidArgs...)
sq = sq.OrderBy("_ranked._rn")
return r.queryAll(sq, dest)
return r.executeTwoPhase(sq, dest, rowidCore, cfg, options)
}
// qualifyOrderBy prepends tableName to a simple column name. Returns empty string for