perf(subsonic): speed up getRandomSongs with two-phase random-rowid selection (#5618)

getRandomSongs used a single ORDER BY random() over the media_file table. At large
library sizes that forces SQLite to scan the wide table and sort every matching row
before applying the limit — roughly 4 seconds for a 1M-track library, regardless of how
many songs are requested.

Add MediaFileRepository.GetRandom, which does this in two passes: first select N random
rowids over a narrow index (filters + library scope only, no wide columns or joins), so
the random sort runs over compact, index-friendly data; then hydrate just those rows with
the full select. The wide media_file row is never part of the sort. End-to-end this drops
getRandomSongs from ~4s to ~0.3s on a 1M-track library, and the cost no longer grows with
the requested size.

The handler now calls GetRandom directly. The filter helper that builds the genre/year
filters is renamed SongsByRandom -> SongsByGenreAndYearRange to reflect what it does, since
the random ordering is now owned by GetRandom rather than a Sort option. Filters (genre,
year, library) compose into the first pass unchanged. The album random list path is left
as-is (far fewer rows, already fast).
This commit is contained in:
Deluan Quintão
2026-06-15 16:23:39 -04:00
committed by GitHub
parent 08a027dbcc
commit f0625ff709
6 changed files with 150 additions and 6 deletions
+3
View File
@@ -439,6 +439,9 @@ type MediaFileRepository interface {
Get(id string) (*MediaFile, error)
GetWithParticipants(id string) (*MediaFile, error)
GetAll(options ...QueryOptions) (MediaFiles, error)
// GetRandom returns up to options.Max media files in random order, applying the same
// filters as GetAll. Sort/Order are ignored.
GetRandom(options ...QueryOptions) (MediaFiles, error)
GetAllByTags(tag TagName, values []string, options ...QueryOptions) (MediaFiles, error)
GetCursor(options ...QueryOptions) (MediaFileCursor, error)
Delete(id string) error
+34
View File
@@ -207,6 +207,40 @@ func (r *mediaFileRepository) GetAll(options ...model.QueryOptions) (model.Media
return res.toModels(), nil
}
// GetRandom uses two passes so the random sort runs over a narrow rowid index instead of the
// wide media_file row: pick random rowids first, then hydrate only those.
func (r *mediaFileRepository) GetRandom(options ...model.QueryOptions) (model.MediaFiles, error) {
var opt model.QueryOptions
if len(options) > 0 {
opt = options[0]
}
rowidQuery := Select("media_file.rowid").From(r.tableName)
rowidQuery = r.applyFilters(rowidQuery, model.QueryOptions{Filters: opt.Filters})
rowidQuery = r.applyLibraryFilter(rowidQuery)
rowidQuery = rowidQuery.OrderBy("random()")
if opt.Max > 0 {
rowidQuery = rowidQuery.Limit(uint64(opt.Max))
}
var rowids []int64
if err := r.queryAllSlice(rowidQuery, &rowids); err != nil {
return nil, err
}
if len(rowids) == 0 {
return model.MediaFiles{}, nil
}
// Re-shuffle in Phase 2: `WHERE rowid IN (...)` returns rows in ascending rowid order, not
// the random order from Phase 1. Sorting only the (<=Max) hydrated rows is negligible.
sq := r.selectMediaFile().Where(Eq{"media_file.rowid": rowids}).OrderBy("random()")
var res dbMediaFiles
if err := r.queryAll(sq, &res); err != nil {
return nil, err
}
return res.toModels(), nil
}
func (r *mediaFileRepository) GetAllByTags(tag model.TagName, values []string, options ...model.QueryOptions) (model.MediaFiles, error) {
placeholders := make([]string, len(values))
args := make([]any, len(values))
+97
View File
@@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"reflect"
"time"
"github.com/Masterminds/squirrel"
@@ -106,6 +107,102 @@ var _ = Describe("MediaRepository", func() {
}
})
Describe("GetRandom", func() {
It("returns the requested number of distinct, fully-hydrated media files", func() {
results, err := mr.GetRandom(model.QueryOptions{Max: 5})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(HaveLen(5))
// Each returned row must match its GetAll counterpart exactly — proves Phase 2
// hydrates full rows (not bare rowids) — and ids must be distinct.
byID := map[string]model.MediaFile{}
all, err := mr.GetAll()
Expect(err).ToNot(HaveOccurred())
for _, mf := range all {
byID[mf.ID] = mf
}
seen := map[string]bool{}
for _, mf := range results {
expected, ok := byID[mf.ID]
Expect(ok).To(BeTrue(), "returned id must be a real media file")
Expect(mf.Title).To(Equal(expected.Title), "row must be fully hydrated")
Expect(seen[mf.ID]).To(BeFalse(), "no duplicate rows")
seen[mf.ID] = true
}
})
It("returns all matching files when Max exceeds the total", func() {
results, err := mr.GetRandom(model.QueryOptions{Max: 1000})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(HaveLen(13))
})
It("honors filters", func() {
results, err := mr.GetRandom(model.QueryOptions{
Max: 10,
Filters: squirrel.Eq{"media_file.title": "Antenna"},
})
Expect(err).ToNot(HaveOccurred())
Expect(results).ToNot(BeEmpty())
for _, mf := range results {
Expect(mf.Title).To(Equal("Antenna"))
}
})
It("returns varying results across calls", func() {
// Retry a few times: two random draws of 5 from 13 rows differ with near-certainty.
first, err := mr.GetRandom(model.QueryOptions{Max: 5})
Expect(err).ToNot(HaveOccurred())
firstIDs := func() []string {
ids := make([]string, len(first))
for i, mf := range first {
ids[i] = mf.ID
}
return ids
}()
differed := false
for range 10 {
next, err := mr.GetRandom(model.QueryOptions{Max: 5})
Expect(err).ToNot(HaveOccurred())
nextIDs := make([]string, len(next))
for i, mf := range next {
nextIDs[i] = mf.ID
}
if !reflect.DeepEqual(firstIDs, nextIDs) {
differed = true
break
}
}
Expect(differed).To(BeTrue(), "GetRandom should not return an identical set every call")
})
It("randomizes order even when Max exceeds the total", func() {
// Same set of rows every time (all 13), but the order must still be shuffled —
// guards against Phase 2's `rowid IN (...)` returning rows in rowid order.
first, err := mr.GetRandom(model.QueryOptions{Max: 100})
Expect(err).ToNot(HaveOccurred())
Expect(first).To(HaveLen(13))
firstIDs := make([]string, len(first))
for i, mf := range first {
firstIDs[i] = mf.ID
}
differed := false
for range 10 {
next, err := mr.GetRandom(model.QueryOptions{Max: 100})
Expect(err).ToNot(HaveOccurred())
nextIDs := make([]string, len(next))
for i, mf := range next {
nextIDs[i] = mf.ID
}
if !reflect.DeepEqual(firstIDs, nextIDs) {
differed = true
break
}
}
Expect(differed).To(BeTrue(), "order must vary even when returning all rows")
})
})
Describe("Put CreatedAt behavior (#5050)", func() {
It("sets CreatedAt to now when inserting a new file with zero CreatedAt", func() {
before := time.Now().Add(-time.Second)
+3 -2
View File
@@ -240,10 +240,11 @@ func (api *Router) GetRandomSongs(r *http.Request) (*responses.Subsonic, error)
if err != nil {
return nil, err
}
opts := filter.SongsByRandom(genre, fromYear, toYear)
opts := filter.SongsByGenreAndYearRange(genre, fromYear, toYear)
opts = filter.ApplyLibraryFilter(opts, musicFolderIds)
opts.Max = size
songs, err := api.getSongs(r.Context(), 0, size, opts)
songs, err := api.ds.MediaFile(r.Context()).GetRandom(opts)
if err != nil {
log.Error(r, "Error retrieving random songs", err)
return nil, err
+2 -4
View File
@@ -90,10 +90,8 @@ func SongsByAlbum(albumId string) Options {
})
}
func SongsByRandom(genre string, fromYear, toYear int) Options {
options := Options{
Sort: "random()",
}
func SongsByGenreAndYearRange(genre string, fromYear, toYear int) Options {
options := Options{}
ff := And{}
if genre != "" {
ff = append(ff, filterByGenre(genre))
+11
View File
@@ -98,6 +98,17 @@ func (m *MockMediaFileRepo) GetAll(qo ...model.QueryOptions) (model.MediaFiles,
return result, nil
}
func (m *MockMediaFileRepo) GetRandom(qo ...model.QueryOptions) (model.MediaFiles, error) {
res, err := m.GetAll(qo...)
if err != nil {
return nil, err
}
if len(qo) > 0 && qo[0].Max > 0 && len(res) > qo[0].Max {
res = res[:qo[0].Max]
}
return res, nil
}
func (m *MockMediaFileRepo) Put(mf *model.MediaFile) error {
if m.Err {
return errors.New("error")