Skip to content

Commit b977c25

Browse files
committed
[WIP] Add react state version of guessFeat
1 parent cda4828 commit b977c25

File tree

5 files changed

+332
-2
lines changed

5 files changed

+332
-2
lines changed

root/static/scripts/common/components/Autocomplete2/reducer.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ export function generateItems<T: EntityItemT>(
7373
state: StateT<T>,
7474
): $ReadOnlyArray<ItemT<T>> {
7575
const items: Array<ItemT<T>> = [];
76+
console.log('state is:');
77+
console.log(state);
7678

7779
if (state.error) {
7880
switch (state.error) {

root/static/scripts/edit/components/FormRowNameWithGuessCase.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ component FormRowNameWithGuessCase(
9494
field: FieldT<string | null>,
9595
guessCaseOptions: GuessCaseOptionsStateT,
9696
guessFeat: boolean = false,
97+
handleGuessFeat?: (event: SyntheticEvent<HTMLButtonElement>) => void,
9798
isGuessCaseOptionsOpen: boolean = false,
9899
label: React.Node = addColonText(l('Name')),
99100
) {
@@ -176,6 +177,7 @@ component FormRowNameWithGuessCase(
176177
{guessFeat ? (
177178
<button
178179
className="guessfeat icon"
180+
onClick={handleGuessFeat}
179181
title={l('Guess feat. artists')}
180182
type="button"
181183
/>
Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
/*
2+
* @flow
3+
* Copyright (C) 2025 MetaBrainz Foundation
4+
*
5+
* This file is part of MusicBrainz, the open internet music database,
6+
* and is licensed under the GPL version 2, or (at your option) any
7+
* later version: http://www.gnu.org/licenses/gpl-2.0.txt
8+
*/
9+
10+
import balanced from 'balanced-match';
11+
12+
import {
13+
BRACKET_PAIRS,
14+
MIN_NAME_SIMILARITY,
15+
} from '../../common/constants.js';
16+
import {last} from '../../common/utility/arrays.js';
17+
import clean from '../../common/utility/clean.js';
18+
import {cloneArrayDeep} from '../../common/utility/cloneDeep.mjs';
19+
20+
import {
21+
fromFullwidthLatin,
22+
hasFullwidthLatin,
23+
toFullwidthLatin,
24+
} from './fullwidthLatin.js';
25+
import getRelatedArtists from './getRelatedArtists.js';
26+
import isEntityProbablyClassical from './isEntityProbablyClassical.js';
27+
import getSimilarity from './similarity.js';
28+
29+
type GuessFeatEntityT = {
30+
+artistCredit: ArtistCreditT,
31+
+name: string,
32+
+relationships?: $ReadOnlyArray<RelationshipT>,
33+
};
34+
35+
type ExpandedArtistCreditT = {
36+
artist: ArtistT,
37+
joinPhrase: string,
38+
name: string,
39+
similarity: number,
40+
};
41+
42+
type ExtractedCreditsT = {
43+
+artistCredit: Array<ExpandedArtistCreditT>,
44+
+joinPhrase: string,
45+
+name: string,
46+
};
47+
48+
/* eslint-disable sort-keys */
49+
export const featRegex: RegExp = /(?:^\s*|[,-]\s*|\s+)((?:ft|feat||)(?:[.]|(?=\s))|(?:featuring|)(?=\s))\s*/i;
50+
/*
51+
* `featQuickTestRegex` is used to quickly test whether a title *might*
52+
* contain featured artists. It's fine if it returns false-positives.
53+
* Please keep it in sync with `featRegex` above.
54+
*/
55+
const featQuickTestRegex = /ft|feat||/i;
56+
const collabRegex = /([,]?\s+(?:&|and|et|||)\s+||[,;]\s+|\s*[/]\s*|\s+(?:vs|)[.]\s+)/i;
57+
58+
function extractNonBracketedFeatCredits(
59+
str: string,
60+
artists: Array<ArtistT>,
61+
isProbablyClassical: boolean,
62+
): ExtractedCreditsT {
63+
const parts = str.split(featRegex).map(clean);
64+
65+
function fixFeatJoinPhrase(existing: string) {
66+
const joinPhrase = isProbablyClassical ? '; ' : existing ? (
67+
' ' +
68+
fromFullwidthLatin(existing)
69+
.toLowerCase()
70+
.replace(/^feat$/i, '$&.') +
71+
' '
72+
) : ' feat. ';
73+
74+
return hasFullwidthLatin(existing)
75+
? toFullwidthLatin(joinPhrase)
76+
: joinPhrase;
77+
}
78+
79+
const name = clean(parts[0]);
80+
81+
const joinPhrase = (parts.length < 2)
82+
? ''
83+
: fixFeatJoinPhrase(parts[1]);
84+
85+
const artistCredit = parts
86+
.splice(2)
87+
.filter((value, key) => value && key % 2 === 0)
88+
.flatMap(c => expandCredit(c, artists, isProbablyClassical));
89+
90+
return {
91+
name,
92+
joinPhrase,
93+
artistCredit,
94+
};
95+
}
96+
97+
function extractBracketedFeatCredits(
98+
str: string,
99+
artists: Array<ArtistT>,
100+
isProbablyClassical: boolean,
101+
): ExtractedCreditsT {
102+
return BRACKET_PAIRS.reduce(function (accum, pair) {
103+
let name = '';
104+
let joinPhrase = accum.joinPhrase;
105+
let credits = accum.artistCredit;
106+
let remainder = accum.name;
107+
let b;
108+
let m;
109+
110+
while (true) {
111+
b = balanced(pair[0], pair[1], remainder);
112+
if (b) {
113+
m = extractFeatCredits(b.body, artists, isProbablyClassical, true);
114+
name += b.pre;
115+
116+
if (m.name) {
117+
/*
118+
* Check if the remaining text in the brackets
119+
* is also an artist name.
120+
*/
121+
const expandedCredits = expandCredit(
122+
m.name, artists, isProbablyClassical,
123+
);
124+
125+
if (expandedCredits.some(
126+
c => c.similarity >= MIN_NAME_SIMILARITY,
127+
)) {
128+
credits = credits.concat(expandedCredits);
129+
} else {
130+
name += pair[0] + m.name + pair[1];
131+
}
132+
}
133+
134+
joinPhrase ||= m.joinPhrase;
135+
credits = credits.concat(m.artistCredit);
136+
remainder = b.post;
137+
} else {
138+
name += remainder;
139+
break;
140+
}
141+
}
142+
143+
return {name: clean(name), joinPhrase, artistCredit: credits};
144+
}, {name: str, joinPhrase: '', artistCredit: []});
145+
}
146+
147+
export function extractFeatCredits(
148+
name: string,
149+
artists: Array<ArtistT>,
150+
isProbablyClassical: boolean,
151+
allowEmptyName: boolean,
152+
): ExtractedCreditsT {
153+
if (!featQuickTestRegex.test(name)) {
154+
return {name, joinPhrase: '', artistCredit: []};
155+
}
156+
157+
const m1 = extractBracketedFeatCredits(name, artists, isProbablyClassical);
158+
159+
if (!m1.name && !allowEmptyName) {
160+
return {name, joinPhrase: '', artistCredit: []};
161+
}
162+
163+
const m2 = extractNonBracketedFeatCredits(
164+
m1.name, artists, isProbablyClassical,
165+
);
166+
167+
if (!m2.name && !allowEmptyName) {
168+
return m1;
169+
}
170+
171+
return {
172+
name: m2.name,
173+
joinPhrase: m2.joinPhrase || m1.joinPhrase,
174+
artistCredit: m2.artistCredit.concat(m1.artistCredit),
175+
};
176+
}
177+
178+
function cleanCredit(name: string, isProbablyClassical: boolean) {
179+
// remove classical roles
180+
return isProbablyClassical ? name.replace(/^[a-z]+: (.+)$/, '$1') : name;
181+
}
182+
183+
function bestArtistMatch(artists: Array<ArtistT> | null, name: string) {
184+
if (!artists) {
185+
return null;
186+
}
187+
let match = null;
188+
for (const artist of artists) {
189+
const similarity = getSimilarity(name, artist.name);
190+
if (
191+
similarity >= MIN_NAME_SIMILARITY &&
192+
(match == null || similarity > match.similarity)
193+
) {
194+
match = {similarity, artist, name, joinPhrase: ''};
195+
}
196+
}
197+
return match;
198+
}
199+
200+
function expandCredit(
201+
fullName: string,
202+
artists: Array<ArtistT>,
203+
isProbablyClassical: boolean,
204+
): Array<ExpandedArtistCreditT> {
205+
const cleanedFullName = cleanCredit(fullName, isProbablyClassical);
206+
207+
/*
208+
* See which produces a better match to an existing artist: the full
209+
* credit, or the individual credits as split by collabRegex. Some artist
210+
* names legitimately contain characters in collabRegex, so this stops
211+
* those from getting split (assuming the artist appears in a relationship
212+
* or artist credit).
213+
*/
214+
const bestFullMatch = bestArtistMatch(artists, cleanedFullName);
215+
216+
function fixJoinPhrase(existing: string) {
217+
const joinPhrase = isProbablyClassical ? ', ' : (existing || ' & ');
218+
219+
return hasFullwidthLatin(existing)
220+
? toFullwidthLatin(joinPhrase)
221+
: joinPhrase;
222+
}
223+
224+
const splitParts = cleanedFullName.split(collabRegex);
225+
const splitMatches = [];
226+
let bestSplitMatch: ExpandedArtistCreditT;
227+
228+
for (let i = 0; i < splitParts.length; i += 2) {
229+
const name = cleanCredit(splitParts[i], isProbablyClassical);
230+
const match = {
231+
similarity: -1,
232+
artist: null,
233+
name,
234+
joinPhrase: fixJoinPhrase(splitParts[i + 1]),
235+
...bestArtistMatch(artists, name),
236+
};
237+
splitMatches.push(match);
238+
if (!bestSplitMatch || match.similarity > bestSplitMatch.similarity) {
239+
bestSplitMatch = match;
240+
}
241+
}
242+
243+
if (bestFullMatch && bestSplitMatch &&
244+
bestFullMatch.similarity > bestSplitMatch.similarity) {
245+
bestFullMatch.joinPhrase = fixJoinPhrase('');
246+
return [bestFullMatch];
247+
}
248+
249+
return splitMatches;
250+
}
251+
252+
export default function guessFeat(
253+
entity: GuessFeatEntityT,
254+
): {artistCreditNames: Array<ArtistCreditNameT>, name: string} | null {
255+
const name = entity.name;
256+
console.log(entity);
257+
console.log(name);
258+
259+
if (empty(name)) {
260+
// Nothing to guess from an empty name
261+
return null;
262+
}
263+
console.log('still here');
264+
265+
const relatedArtists = getRelatedArtists(entity.relationships);
266+
267+
const isProbablyClassical = isEntityProbablyClassical(entity);
268+
269+
const match = extractFeatCredits(
270+
name, relatedArtists, isProbablyClassical, false,
271+
);
272+
273+
if (!match.name || !match.artistCredit.length) {
274+
return null;
275+
}
276+
277+
const artistCredit = cloneArrayDeep(entity.artistCredit.names);
278+
last(artistCredit).joinPhrase = match.joinPhrase;
279+
last(match.artistCredit).joinPhrase = '';
280+
281+
const guessedArtistCreditNames = [...artistCredit];
282+
for (const name of match.artistCredit) {
283+
guessedArtistCreditNames.push({
284+
artist: name.artist,
285+
joinPhrase: name.joinPhrase,
286+
name: name.name,
287+
});
288+
}
289+
290+
return {
291+
name: match.name,
292+
artistCreditNames: guessedArtistCreditNames,
293+
};
294+
}

root/static/scripts/edit/utility/similarity.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/*
2+
* @flow
23
* Copyright (C) 2014 MetaBrainz Foundation
34
*
45
* This file is part of MusicBrainz, the open internet music database,
@@ -10,7 +11,7 @@ import leven from 'leven';
1011

1112
const punctuation = /[!"#$%&'()*+,\-.>/:;<=>?¿@[\\\]^_`{|}~\u2000-\u206F\s]/g;
1213

13-
function stripSpacesAndPunctuation(str) {
14+
function stripSpacesAndPunctuation(str?: string) {
1415
return (str || '').replace(punctuation, '').toLowerCase();
1516
}
1617

@@ -20,7 +21,7 @@ function stripSpacesAndPunctuation(str) {
2021
* the two strings.
2122
*/
2223

23-
export default function similarity(a, b) {
24+
export default function similarity(a?: string, b?: string): number {
2425
/*
2526
* If a track title is all punctuation, we'll end up with an empty
2627
* string, so just fall back to the original for comparison.

0 commit comments

Comments
 (0)