Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 41 additions & 13 deletions packages/core/src/metadata/tmdb.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Headers } from 'undici';
import { Env, Cache, makeRequest, ParsedId, IdType } from '../utils/index.js';
import { Metadata } from './utils.js';
import { Metadata, TitleWithLanguage } from './utils.js';
import { z } from 'zod';

export type TMDBIdType = 'imdb_id' | 'tmdb_id' | 'tvdb_id';
Expand Down Expand Up @@ -219,7 +219,7 @@ export class TMDBMetadata {
private async fetchAlternativeTitles(
url: URL,
mediaType: string
): Promise<string[]> {
): Promise<{ titles: string[]; titlesWithLanguages: any[] }> {
const response = await makeRequest(url.toString(), {
timeout: 5000,
headers: this.getHeaders(),
Expand All @@ -235,17 +235,20 @@ export class TMDBMetadata {

if (mediaType === 'movie') {
const data = MovieAlternativeTitlesSchema.parse(json);
return data.titles.map((title) => title.title);
const titles = data.titles.map((title) => title.title);
// Alternative titles don't have language info, so we return empty array
return { titles, titlesWithLanguages: [] };
} else {
const data = TVAlternativeTitlesSchema.parse(json);
return data.results.map((title) => title.title);
const titles = data.results.map((title) => title.title);
return { titles, titlesWithLanguages: [] };
}
}

private async fetchTranslatedTitles(
url: URL,
mediaType: string
): Promise<string[]> {
): Promise<{ titles: string[]; titlesWithLanguages: TitleWithLanguage[] }> {
const response = await makeRequest(url.toString(), {
timeout: 5000,
headers: this.getHeaders(),
Expand All @@ -256,18 +259,38 @@ export class TMDBMetadata {
}

const json = await response.json();
const titlesWithLanguages: TitleWithLanguage[] = [];
const titles: string[] = [];

if (mediaType === 'movie') {
const data = MovieTranslationsSchema.parse(json);
return data.translations
.map((translation) => translation.data.name)
.filter(Boolean);
data.translations.forEach((translation) => {
if (translation.data.name) {
titles.push(translation.data.name);
titlesWithLanguages.push({
title: translation.data.name,
iso_639_1: translation.iso_639_1,
iso_3166_1: translation.iso_3166_1,
english_name: translation.english_name,
});
}
});
} else {
const data = TVTranslationsSchema.parse(json);
return data.translations
.map((translation) => translation.data.title)
.filter(Boolean);
data.translations.forEach((translation) => {
if (translation.data.title) {
titles.push(translation.data.title);
titlesWithLanguages.push({
title: translation.data.title,
iso_639_1: translation.iso_639_1,
iso_3166_1: translation.iso_3166_1,
english_name: translation.english_name,
});
}
});
}

return { titles, titlesWithLanguages };
}

public async getMetadata(parsedId: ParsedId): Promise<Metadata> {
Expand Down Expand Up @@ -371,12 +394,16 @@ export class TMDBMetadata {
this.fetchTranslatedTitles(translatedTitlesUrl, parsedId.mediaType),
]);

const allTitlesWithLanguages: TitleWithLanguage[] = [];

if (altTitlesResult.status === 'fulfilled') {
allTitles.push(...altTitlesResult.value);
allTitles.push(...altTitlesResult.value.titles);
allTitlesWithLanguages.push(...altTitlesResult.value.titlesWithLanguages);
}

if (translationsResult.status === 'fulfilled') {
allTitles.push(...translationsResult.value);
allTitles.push(...translationsResult.value.titles);
allTitlesWithLanguages.push(...translationsResult.value.titlesWithLanguages);
}

// If both requests failed, we should throw an error
Expand All @@ -393,6 +420,7 @@ export class TMDBMetadata {
const metadata: Metadata = {
title: primaryTitle,
titles: uniqueTitles,
titlesWithLanguages: allTitlesWithLanguages,
releaseDate: releaseDate,
year: Number(year),
yearEnd: yearEnd ? Number(yearEnd) : undefined,
Expand Down
8 changes: 8 additions & 0 deletions packages/core/src/metadata/utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
export interface TitleWithLanguage {
title: string;
iso_639_1: string;
iso_3166_1?: string;
english_name: string;
}

export interface Metadata {
title: string;
titles?: string[];
titlesWithLanguages?: TitleWithLanguage[];
year?: number;
yearEnd?: number;
releaseDate?: string;
Expand Down
38 changes: 38 additions & 0 deletions packages/core/src/streams/filterer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,44 @@ class StreamFilterer {
}
}

// Infer languages from TMDB metadata for streams with unknown languages
if (requestedMetadata?.titlesWithLanguages && requestedMetadata.titlesWithLanguages.length > 0) {
logger.debug(`Applying language inference from TMDB metadata for ${streams.length} streams`);
const { inferLanguageFromTitle } = await import('../utils/languages.js');
let inferredCount = 0;

for (const stream of streams) {
if (!stream.parsedFile || !stream.filename) {
continue;
}

// Check if stream has no languages or only Unknown language
const currentLanguages = stream.parsedFile.languages || [];
const hasUnknownLanguage = currentLanguages.length === 0 ||
(currentLanguages.length === 1 && currentLanguages[0] === 'Unknown');

if (hasUnknownLanguage) {
const inferredLanguages = inferLanguageFromTitle(
stream.filename,
requestedMetadata.titlesWithLanguages,
currentLanguages
);

if (inferredLanguages.length > 0) {
stream.parsedFile.languages = inferredLanguages;
inferredCount++;
logger.debug(
`Inferred language for stream "${stream.filename}": ${inferredLanguages.join(', ')}`
);
}
}
}

if (inferredCount > 0) {
logger.info(`Inferred languages for ${inferredCount} streams from TMDB metadata`);
}
}

const normaliseTitle = (title: string) => {
return title
.normalize('NFD')
Expand Down
106 changes: 106 additions & 0 deletions packages/core/src/utils/languages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1995,3 +1995,109 @@ export const FULL_LANGUAGE_MAPPING = [
name: 'Asturianu',
},
];

/**
* Normalizes a title for comparison
*/
function normalizeTitle(title: string): string {
return title
.toLowerCase()
.replace(/[^\p{L}\p{N}]/gu, ' ')
.replace(/\s+/g, ' ')
.trim();
}

/**
* Infers the language(s) of a stream based on its filename and title translations from TMDB
*
* @param filename - The filename to analyze
* @param titlesWithLanguages - Array of title/language pairs from TMDB
* @param existingLanguages - Any languages already detected from the filename
* @returns Array of inferred language names, or empty array if inference is not possible
*/
export function inferLanguageFromTitle(
filename: string,
titlesWithLanguages: Array<{
title: string;
iso_639_1: string;
iso_3166_1?: string;
english_name: string;
}>,
existingLanguages: string[] = []
): string[] {
// If explicit language tags exist, prioritize them
if (existingLanguages.length > 0 && !existingLanguages.includes('Unknown')) {
return existingLanguages;
}

if (!titlesWithLanguages || titlesWithLanguages.length === 0) {
return [];
}

const normalizedFilename = normalizeTitle(filename);

const matchingTitles = titlesWithLanguages.filter((item) => {
const normalizedTitle = normalizeTitle(item.title);
return normalizedFilename.includes(normalizedTitle);
});

if (matchingTitles.length === 0) {
return [];
}

// Group matches by base language code
const languageGroups = new Map<string, Set<string>>();

for (const match of matchingTitles) {
const langCode = match.iso_639_1;
if (!languageGroups.has(langCode)) {
languageGroups.set(langCode, new Set());
}

if (match.iso_3166_1) {
languageGroups.get(langCode)!.add(match.iso_3166_1);
}
}

const baseLanguageCodes = Array.from(languageGroups.keys());

// Handle multi-title filenames with different languages
// If multiple distinct titles are present in different languages, mark as Multi
if (baseLanguageCodes.length > 1) {
const uniqueTitles = new Set(
matchingTitles.map((m) => normalizeTitle(m.title))
);
// If we have multiple different titles (not just the same title translated), it's Multi
if (uniqueTitles.size > 1) {
return ['Multi'];
}
// If same title appears in multiple languages (ambiguous), keep as Unknown
return [];
}

// Handle regional variants and single language

if (baseLanguageCodes.length === 1) {
const langCode = baseLanguageCodes[0];

// Map ISO 639-1 code to the language name
const languageEntry =
FULL_LANGUAGE_MAPPING.find(
(lang) => lang.iso_639_1 === langCode && lang.flag_priority
) || FULL_LANGUAGE_MAPPING.find((lang) => lang.iso_639_1 === langCode);

if (languageEntry) {
const languageName = (
languageEntry.internal_english_name || languageEntry.english_name
)
?.split('(')?.[0]
?.trim();

if (languageName) {
return [languageName];
}
}
}

return [];
}