feat(metadata): add computeArtistPID with name-attr normalization

This commit is contained in:
Deluan
2026-05-24 19:01:37 -03:00
parent 273c8d23f1
commit 3358a8bc91
2 changed files with 148 additions and 3 deletions
+57 -3
View File
@@ -87,10 +87,64 @@ func (md Metadata) albumID(mf model.MediaFile, pidConf string) string {
return computePID(mf, md, pidConf, true, id.NewHash)
}
// BFR Must be configurable?
// computeArtistPID computes a persistent ID for a single participant using the
// given spec. The spec grammar is the same pipe/comma form used by Album/Track
// PIDs. Attributes recognised:
// - name → hash(clear(lower(participant.Name))) — the inner
// hash matches the historical 'albumartistid' path
// so the default spec "name" produces byte-identical
// IDs to the previous hardcoded artistID().
// - musicbrainz_artistid → participant.MbzArtistID (opaque identifier — no
// normalization).
// - sort_name → participant.SortArtistName (raw — sort forms are
// already canonicalized by taggers, and folding case
// could collapse legitimately distinct sort variants).
//
// Unlike album/track PIDs, no library prefix is applied: artists are shared
// across libraries by design.
func computeArtistPID(p model.Participant, spec string, hash hashFunc) string {
pid := ""
fields := strings.SplitSeq(spec, "|")
for field := range fields {
attributes := strings.Split(field, ",")
values := make([]string, len(attributes))
hasValue := false
for i, attr := range attributes {
v := getArtistPIDAttr(p, attr, hash)
if v != "" {
hasValue = true
}
values[i] = v
}
if hasValue {
pid += strings.Join(values, "\\")
break
}
}
return hash(pid)
}
func getArtistPIDAttr(p model.Participant, attr string, hash hashFunc) string {
switch strings.TrimSpace(strings.ToLower(attr)) {
case "name":
if p.Name == "" {
return ""
}
return hash(str.Clear(strings.ToLower(p.Name)))
case "musicbrainz_artistid":
return p.MbzArtistID
case "sort_name":
return p.SortArtistName
}
return ""
}
// artistID is kept temporarily as a thin wrapper so any in-progress callers
// continue to compile; it will be removed once map_participants.go switches
// to computeArtistPID directly (Task 4).
func (md Metadata) artistID(name string) string {
mf := model.MediaFile{AlbumArtist: name}
return computePID(mf, md, "albumartistid", false, id.NewHash)
return computeArtistPID(model.Participant{Artist: model.Artist{Name: name}},
conf.Server.PID.Artist, id.NewHash)
}
func (md Metadata) mapTrackTitle() string {
+91
View File
@@ -5,7 +5,9 @@ import (
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/conf/configtest"
"github.com/navidrome/navidrome/consts"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/model/id"
"github.com/navidrome/navidrome/tests"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
@@ -291,3 +293,92 @@ var _ = Describe("getPID", func() {
})
})
})
var _ = Describe("computeArtistPID", func() {
var p model.Participant
BeforeEach(func() {
DeferCleanup(configtest.SetupConfig())
p = model.Participant{Artist: model.Artist{Name: "The Beatles"}}
})
Context("default spec 'name'", func() {
BeforeEach(func() { conf.Server.PID.Artist = consts.DefaultArtistPID })
It("produces the same ID as the legacy artistID(name) hardcoded function", func() {
// Legacy formula: hash(hash(clear(lower(name))))
// It came from artistID(name) routing through computePID(spec="albumartistid"),
// which inside getPIDAttr returns hash(clear(lower(mf.AlbumArtist))), then
// computePID hashes that again.
mfWithAlbumArtist := model.MediaFile{AlbumArtist: p.Name}
legacyMD := Metadata{}
legacyID := computePID(mfWithAlbumArtist, legacyMD, "albumartistid", false, id.NewHash)
newID := computeArtistPID(p, conf.Server.PID.Artist, id.NewHash)
Expect(newID).To(Equal(legacyID))
})
It("normalizes name (clear(lower(...))) before hashing", func() {
// Different casing / punctuation must collapse to the same ID.
p2 := model.Participant{Artist: model.Artist{Name: "the beatles"}}
Expect(computeArtistPID(p, conf.Server.PID.Artist, id.NewHash)).
To(Equal(computeArtistPID(p2, conf.Server.PID.Artist, id.NewHash)))
})
})
Context("spec 'musicbrainz_artistid|name'", func() {
BeforeEach(func() { conf.Server.PID.Artist = "musicbrainz_artistid|name" })
It("uses MBID when present", func() {
p.MbzArtistID = "mbid-1"
withMBID := computeArtistPID(p, conf.Server.PID.Artist, id.NewHash)
p2 := model.Participant{Artist: model.Artist{Name: "Different", MbzArtistID: "mbid-1"}}
withMBIDOtherName := computeArtistPID(p2, conf.Server.PID.Artist, id.NewHash)
Expect(withMBID).To(Equal(withMBIDOtherName))
})
It("falls back to normalized name when MBID is missing", func() {
withoutMBID := computeArtistPID(p, conf.Server.PID.Artist, id.NewHash)
byName := computeArtistPID(p, "name", id.NewHash)
Expect(withoutMBID).To(Equal(byName))
})
})
Context("composite spec 'musicbrainz_artistid|sort_name,name'", func() {
BeforeEach(func() { conf.Server.PID.Artist = "musicbrainz_artistid|sort_name,name" })
It("hashes sort+name composite when MBID is absent", func() {
p.SortArtistName = "Beatles, The"
withSort := computeArtistPID(p, conf.Server.PID.Artist, id.NewHash)
p2 := p
p2.SortArtistName = "Different Sort"
withDifferentSort := computeArtistPID(p2, conf.Server.PID.Artist, id.NewHash)
Expect(withSort).NotTo(Equal(withDifferentSort))
})
})
Context("spec 'sort_name|name' with empty sort_name", func() {
BeforeEach(func() { conf.Server.PID.Artist = "sort_name|name" })
It("falls through to name when SortArtistName is empty", func() {
// sort_name is empty → first pipe alternative produces no value →
// engine falls through to name.
fallback := computeArtistPID(p, conf.Server.PID.Artist, id.NewHash)
byName := computeArtistPID(p, "name", id.NewHash)
Expect(fallback).To(Equal(byName))
})
})
Context("empty inputs", func() {
BeforeEach(func() { conf.Server.PID.Artist = "musicbrainz_artistid|name" })
It("does not panic on empty participant", func() {
empty := model.Participant{Artist: model.Artist{Name: ""}}
Expect(func() { computeArtistPID(empty, conf.Server.PID.Artist, id.NewHash) }).
NotTo(Panic())
})
})
})