Skip to content

Commit 49d5d35

Browse files
committed
fix(transfer): Improve duplicate detection and error handling
1 parent d87ae5f commit 49d5d35

File tree

7 files changed

+239
-22
lines changed

7 files changed

+239
-22
lines changed

src/backend/common/vendor/koito/KoitoApiClient.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,37 @@ export class KoitoApiClient extends AbstractApiClient {
144144
}
145145
}
146146

147+
getUserListensWithPagination = async (options: {
148+
count?: number;
149+
minTs?: number;
150+
maxTs?: number;
151+
} = {}): Promise<ListensResponse> => {
152+
const { count = 100, minTs, maxTs } = options;
153+
154+
try {
155+
const query: any = { count };
156+
if (minTs !== undefined) {
157+
query.min_ts = minTs;
158+
}
159+
if (maxTs !== undefined) {
160+
query.max_ts = maxTs;
161+
}
162+
163+
const resp = await this.callApi(request
164+
.get(`${joinedUrl(this.url.url, '/apis/listenbrainz/1/user', this.config.username, 'listens')}`)
165+
.timeout({
166+
response: 15000,
167+
deadline: 30000
168+
})
169+
.query(query));
170+
171+
const { body: { payload } } = resp as any;
172+
return payload as ListensResponse;
173+
} catch (e) {
174+
throw e;
175+
}
176+
}
177+
147178
getRecentlyPlayed = async (maxTracks: number): Promise<PlayObject[]> => {
148179
try {
149180
const resp = await this.getUserListens(maxTracks);

src/backend/scrobblers/LastfmScrobbler.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { Logger } from "@foxxmd/logging";
2+
import dayjs, { Dayjs } from "dayjs";
23
import EventEmitter from "events";
34
import { NowPlayingResponse, TrackScrobbleResponse, UserGetRecentTracksResponse } from "lastfm-node-client";
45
import { PlayObject } from "../../core/Atomic.js";
@@ -96,6 +97,51 @@ export default class LastfmScrobbler extends AbstractScrobbleClient {
9697
}
9798
}
9899

100+
getScrobblesForTimeRange = async (fromDate?: Dayjs, toDate?: Dayjs, limit: number = 1000): Promise<PlayObject[]> => {
101+
const allPlays: PlayObject[] = [];
102+
let currentPage = 1;
103+
const perPage = 200;
104+
105+
while (allPlays.length < limit) {
106+
const resp = await this.api.getRecentTracksWithPagination({
107+
page: currentPage,
108+
limit: perPage,
109+
from: fromDate?.unix(),
110+
to: toDate?.unix(),
111+
});
112+
113+
const {
114+
recenttracks: {
115+
track: rawTracks = [],
116+
'@attr': pageInfo
117+
} = {}
118+
} = resp;
119+
120+
if (rawTracks.length === 0) {
121+
break;
122+
}
123+
124+
const plays = rawTracks
125+
.filter(t => t.date !== undefined)
126+
.map(t => LastfmApiClient.formatPlayObj(t))
127+
.filter(p => p.data.playDate && p.data.playDate.isValid()); // Filter out plays with invalid dates
128+
129+
allPlays.push(...plays);
130+
131+
if (allPlays.length >= limit) {
132+
break;
133+
}
134+
135+
if (pageInfo && currentPage >= parseInt(pageInfo.totalPages, 10)) {
136+
break;
137+
}
138+
139+
currentPage++;
140+
}
141+
142+
return allPlays.slice(0, limit);
143+
}
144+
99145
cleanSourceSearchTitle = (playObj: PlayObject) => {
100146
const {
101147
data: {

src/backend/scrobblers/ListenbrainzScrobbler.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { Logger } from "@foxxmd/logging";
2+
import dayjs, { Dayjs } from "dayjs";
23
import EventEmitter from "events";
34
import { PlayObject } from "../../core/Atomic.js";
45
import { buildTrackString, capitalize } from "../../core/StringUtils.js";
@@ -67,6 +68,44 @@ export default class ListenbrainzScrobbler extends AbstractScrobbleClient {
6768
return await this.api.getRecentlyPlayed(limit);
6869
}
6970

71+
getScrobblesForTimeRange = async (fromDate?: Dayjs, toDate?: Dayjs, limit: number = 1000): Promise<PlayObject[]> => {
72+
const allPlays: PlayObject[] = [];
73+
let maxTs = toDate?.unix();
74+
const minTs = fromDate?.unix();
75+
76+
while (allPlays.length < limit) {
77+
const batchSize = Math.min(100, limit - allPlays.length);
78+
const resp = await this.api.getUserListensWithPagination({
79+
count: batchSize,
80+
minTs,
81+
maxTs,
82+
});
83+
84+
if (!resp.listens || resp.listens.length === 0) {
85+
break;
86+
}
87+
88+
const plays = resp.listens
89+
.map(l => ListenbrainzApiClient.formatPlayObj(l, {}))
90+
.filter(p => p.data.playDate && p.data.playDate.isValid()); // Filter out plays with invalid dates
91+
92+
allPlays.push(...plays);
93+
94+
if (plays.length < batchSize) {
95+
break;
96+
}
97+
98+
const oldestPlay = plays[plays.length - 1];
99+
if (oldestPlay.data.playDate) {
100+
maxTs = oldestPlay.data.playDate.unix() - 1;
101+
} else {
102+
break;
103+
}
104+
}
105+
106+
return allPlays;
107+
}
108+
70109
alreadyScrobbled = async (playObj: PlayObject, log = false) => (await this.existingScrobble(playObj)) !== undefined
71110

72111
public playToClientPayload(playObj: PlayObject): ListenPayload {

src/backend/transfer/TransferJob.ts

Lines changed: 70 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -197,21 +197,12 @@ export class TransferJob {
197197
} = resp;
198198

199199
if (pageInfo && !totalPagesKnown) {
200-
const totalPages = parseInt(pageInfo.totalPages, 10);
201-
const total = parseInt(pageInfo.total, 10);
202-
203-
// If playCount is specified, cap the total at playCount
204-
if (this.playCount) {
205-
this.progress.total = Math.min(this.playCount, total);
206-
// Calculate how many pages we'll actually need
207-
this.progress.totalPages = Math.ceil(this.progress.total / this.PAGE_SIZE);
208-
} else {
209-
this.progress.total = total;
210-
this.progress.totalPages = totalPages;
211-
}
200+
const apiTotal = parseInt(pageInfo.total, 10);
201+
this.progress.total = this.playCount ? Math.min(this.playCount, apiTotal) : apiTotal;
202+
this.progress.totalPages = Math.ceil(this.progress.total / this.PAGE_SIZE);
212203

213204
totalPagesKnown = true;
214-
this.logger.info(`Total pages in source: ${totalPages}, Total plays in source: ${total}, Will transfer: ${this.progress.total}, Expected pages: ${this.progress.totalPages}`);
205+
this.logger.info(`Total plays in source: ${apiTotal}, Will transfer: ${this.progress.total}, Expected pages: ${this.progress.totalPages}`);
215206
}
216207

217208
if (rawTracks.length === 0) {
@@ -313,6 +304,8 @@ export class TransferJob {
313304
await this.processPlaysWithSlidingWindow(sortedPlays, client);
314305
}
315306

307+
private timeRangeScrobbles: PlayObject[] = [];
308+
316309
private async processPlaysWithSlidingWindow(plays: PlayObject[], client: AbstractScrobbleClient): Promise<void> {
317310
if (plays.length === 0) {
318311
return;
@@ -321,11 +314,33 @@ export class TransferJob {
321314
const oldest = plays[0].data.playDate;
322315
const newest = plays[plays.length - 1].data.playDate;
323316

317+
// Fetch scrobbles for the specific time range being processed
324318
if (oldest && newest) {
325-
this.logger.debug(`Refreshing client scrobbles for time range: ${oldest.format()} to ${newest.format()}`);
326-
await client.refreshScrobbles(this.SLIDING_WINDOW_SIZE);
319+
this.logger.debug(`Fetching client scrobbles for time range: ${oldest.format()} to ${newest.format()}`);
320+
321+
// Check if client supports time-range fetching
322+
if ('getScrobblesForTimeRange' in client && typeof (client as any).getScrobblesForTimeRange === 'function') {
323+
try {
324+
// Add a buffer before/after to catch nearby scrobbles
325+
const bufferHours = 24;
326+
const fromDate = oldest.subtract(bufferHours, 'hours');
327+
const toDate = newest.add(bufferHours, 'hours');
328+
329+
this.timeRangeScrobbles = await (client as any).getScrobblesForTimeRange(fromDate, toDate, this.SLIDING_WINDOW_SIZE);
330+
this.logger.debug(`Fetched ${this.timeRangeScrobbles.length} scrobbles from ${this.clientName} for duplicate detection`);
331+
} catch (e) {
332+
this.logger.error(`Error fetching scrobbles for time range: ${e.message}`);
333+
throw e;
334+
}
335+
} else {
336+
// Fallback to regular refresh (will only work for recent scrobbles)
337+
this.logger.warn('Client does not support time-range fetching, falling back to regular refresh (may not detect duplicates for old scrobbles)');
338+
await client.refreshScrobbles(this.SLIDING_WINDOW_SIZE);
339+
this.timeRangeScrobbles = [];
340+
}
327341
} else {
328342
await client.refreshScrobbles(this.SLIDING_WINDOW_SIZE);
343+
this.timeRangeScrobbles = [];
329344
}
330345

331346
for (let i = 0; i < plays.length; i++) {
@@ -358,6 +373,37 @@ export class TransferJob {
358373
this.progress.currentTrack = undefined;
359374
}
360375

376+
private isAlreadyScrobbledInTimeRange(play: PlayObject): boolean {
377+
if (this.timeRangeScrobbles.length === 0) {
378+
return false;
379+
}
380+
381+
const playDate = play.data.playDate;
382+
if (!playDate) {
383+
return false;
384+
}
385+
386+
// Check for matching scrobble (same track, artist, and similar timestamp)
387+
return this.timeRangeScrobbles.some(scrobbled => {
388+
const scrobbledDate = scrobbled.data.playDate;
389+
if (!scrobbledDate) {
390+
return false;
391+
}
392+
393+
// Check if timestamps are within 30 seconds of each other
394+
const timeDiffSeconds = Math.abs(playDate.diff(scrobbledDate, 'second'));
395+
if (timeDiffSeconds > 30) {
396+
return false;
397+
}
398+
399+
// Check if track and artists match
400+
const trackMatch = play.data.track?.toLowerCase() === scrobbled.data.track?.toLowerCase();
401+
const artistsMatch = play.data.artists?.join(',').toLowerCase() === scrobbled.data.artists?.join(',').toLowerCase();
402+
403+
return trackMatch && artistsMatch;
404+
});
405+
}
406+
361407
private async processPlay(play: PlayObject, client: AbstractScrobbleClient): Promise<void> {
362408
const transformedPlay = client.transformPlay(play, TRANSFORM_HOOK.preCompare);
363409

@@ -366,9 +412,17 @@ export class TransferJob {
366412
// 2. timeFrameIsValid is designed to prevent re-scrobbling during normal operation
367413
// 3. We have our own duplicate detection via time-range fetching
368414

415+
// Check against our time-range-specific scrobbles (the actual duplicate detection)
416+
if (this.isAlreadyScrobbledInTimeRange(transformedPlay)) {
417+
this.logger.verbose(`DUPLICATE (time-range): ${buildTrackString(play)} - found in ${this.clientName}'s ${this.timeRangeScrobbles.length} scrobbles`);
418+
this.progress.duplicates++;
419+
return;
420+
}
421+
422+
// Check using the client's built-in method (for recently added scrobbles from this transfer)
369423
const alreadyScrobbled = await client.alreadyScrobbled(transformedPlay);
370424
if (alreadyScrobbled) {
371-
this.logger.debug(`Skipping ${buildTrackString(play)}: already scrobbled`);
425+
this.logger.verbose(`DUPLICATE (recent): ${buildTrackString(play)} - found in ${this.clientName}'s recent scrobbles`);
372426
this.progress.duplicates++;
373427
return;
374428
}

src/backend/transfer/TransferManager.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,39 @@ export class TransferManager {
129129
throw new Error('Play count must be greater than 0');
130130
}
131131

132+
// Validate date range if provided
133+
if (fromDate || toDate) {
134+
const now = new Date();
135+
136+
if (fromDate) {
137+
const from = new Date(fromDate);
138+
if (isNaN(from.getTime())) {
139+
throw new Error('Invalid fromDate format');
140+
}
141+
if (from > now) {
142+
throw new Error('fromDate cannot be in the future');
143+
}
144+
}
145+
146+
if (toDate) {
147+
const to = new Date(toDate);
148+
if (isNaN(to.getTime())) {
149+
throw new Error('Invalid toDate format');
150+
}
151+
if (to > now) {
152+
throw new Error('toDate cannot be in the future');
153+
}
154+
}
155+
156+
if (fromDate && toDate) {
157+
const from = new Date(fromDate);
158+
const to = new Date(toDate);
159+
if (from > to) {
160+
throw new Error('fromDate must be before toDate');
161+
}
162+
}
163+
}
164+
132165
const source = this.scrobbleSources.getByName(sourceName);
133166
if (!source) {
134167
throw new Error(`Source '${sourceName}' not found`);
@@ -147,6 +180,11 @@ export class TransferManager {
147180
throw new Error(`Client '${clientName}' is not ready`);
148181
}
149182

183+
// Check if client supports time-range fetching (required for duplicate detection)
184+
if (!('getScrobblesForTimeRange' in client) || typeof (client as any).getScrobblesForTimeRange !== 'function') {
185+
throw new Error(`Client '${clientName}' does not support time-range fetching, which is required for accurate duplicate detection during transfers. Supported clients: Last.fm, Listenbrainz`);
186+
}
187+
150188
if (playCount !== undefined && playCount > 10000) {
151189
this.logger.warn(`Play count ${playCount} is very large. This may take a long time.`);
152190
}

src/backend/utils/StringUtils.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export const SYMBOLS_WHITESPACE_REGEX = new RegExp(/[`=(){}<>;'’,.~!@#$%^&*_+|
1212
export const SYMBOLS_REGEX = new RegExp(/[`=(){}<>;',.~!@#$%^&*_+|:"?\-\\[\]/]/g);
1313

1414
export const MULTI_WHITESPACE_REGEX = new RegExp(/\s{2,}/g);
15-
export const uniqueNormalizedStrArr = (arr: string[]): string[] => arr.reduce((acc: string[], curr) => {
15+
export const uniqueNormalizedStrArr = (arr: string[]): string[] => arr.filter(x => x != null).reduce((acc: string[], curr) => {
1616
const normalizedCurr = normalizeStr(curr)
1717
if (!acc.some(x => normalizeStr(x) === normalizedCurr)) {
1818
return acc.concat(curr);
@@ -21,6 +21,9 @@ export const uniqueNormalizedStrArr = (arr: string[]): string[] => arr.reduce((a
2121
}, [])
2222
// https://stackoverflow.com/a/37511463/1469797
2323
export const normalizeStr = (str: string, options?: {keepSingleWhitespace?: boolean}): string => {
24+
if (!str) {
25+
return '';
26+
}
2427
const {keepSingleWhitespace = false} = options || {};
2528
const normal = str.normalize('NFD').replace(/[\u0300-\u036f]/g, "");
2629
if(!keepSingleWhitespace) {
@@ -96,7 +99,7 @@ export const PRIMARY_SECONDARY_SECTIONS_REGEX = new RegExp(/^(?<primary>.+?)(?<s
9699
* */
97100
// export const SECONDARY_ARTISTS_REGEX = new RegExp(//ig);
98101
export const parseCredits = (str: string, delimiters?: boolean | string[]): PlayCredits => {
99-
if (str.trim() === '') {
102+
if (!str || str.trim() === '') {
100103
return undefined;
101104
}
102105

@@ -136,7 +139,7 @@ export const parseCredits = (str: string, delimiters?: boolean | string[]): Play
136139
return undefined;
137140
}
138141
export const parseArtistCredits = (str: string, delimiters?: boolean | string[], ignoreGlobalAmpersand?: boolean): PlayCredits | undefined => {
139-
if (str.trim() === '') {
142+
if (!str || str.trim() === '') {
140143
return undefined;
141144
}
142145
let delims: string[] | undefined;

src/backend/utils/TimeUtils.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,16 @@ dayjs.extend(isToday);
3434
export const temporalPlayComparisonSummary = (data: TemporalPlayComparison, existingPlay?: PlayObject, candidatePlay?: PlayObject) => {
3535
const parts: string[] = [];
3636
if (existingPlay !== undefined && candidatePlay !== undefined) {
37-
if (existingPlay.data.playDate.isSame(candidatePlay.data.playDate, 'day')) {
38-
parts.push(`Existing: ${existingPlay.data.playDate.format('HH:mm:ssZ')} - Candidate: ${candidatePlay.data.playDate.format('HH:mm:ssZ')}`);
37+
const existingDate = existingPlay.data.playDate;
38+
const candidateDate = candidatePlay.data.playDate;
39+
40+
// Check if dates are valid before comparing/formatting
41+
if (!existingDate?.isValid() || !candidateDate?.isValid()) {
42+
parts.push(`Existing: ${existingDate?.isValid() ? existingDate.toISOString() : 'Invalid Date'} - Candidate: ${candidateDate?.isValid() ? candidateDate.toISOString() : 'Invalid Date'}`);
43+
} else if (existingDate.isSame(candidateDate, 'day')) {
44+
parts.push(`Existing: ${existingDate.format('HH:mm:ssZ')} - Candidate: ${candidateDate.format('HH:mm:ssZ')}`);
3945
} else {
40-
parts.push(`Existing: ${existingPlay.data.playDate.toISOString()} - Candidate: ${candidatePlay.data.playDate.toISOString()}`);
46+
parts.push(`Existing: ${existingDate.toISOString()} - Candidate: ${candidateDate.toISOString()}`);
4147
}
4248
}
4349
parts.push(`Temporal Sameness: ${capitalize(temporalAccuracyToString(data.match))}`);

0 commit comments

Comments
 (0)