Skip to content
This repository was archived by the owner on Mar 1, 2025. It is now read-only.

Commit 14ae764

Browse files
committed
feat: extend MojiAdapter to support emoji variations and dataset generation
1 parent ac4dcc7 commit 14ae764

File tree

3 files changed

+106
-2
lines changed

3 files changed

+106
-2
lines changed

src/adapter/index.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { EmojiGroup, EmojiSequence } from "../types";
1+
import type { EmojiGroup, EmojiSequence, EmojiVariation } from "../types";
22
import semver from "semver";
33

44
export interface MojiAdapter {
@@ -31,6 +31,16 @@ export interface MojiAdapter {
3131
* A function to generate the emoji sequences for the specified version
3232
*/
3333
sequences?: SequenceFn;
34+
35+
/**
36+
* A function to generate the emoji dataset for the specified version.
37+
*/
38+
emojis?: EmojiFn;
39+
40+
/**
41+
* A function to generate emoji variations for the specified version.
42+
*/
43+
variations?: EmojiVariationFn;
3444
}
3545

3646
export interface BaseAdapterContext {
@@ -40,6 +50,8 @@ export interface BaseAdapterContext {
4050

4151
export type GroupFn = (ctx: BaseAdapterContext) => Promise<EmojiGroup[]>;
4252
export type SequenceFn = (ctx: BaseAdapterContext) => Promise<{ zwj: EmojiSequence[]; sequences: EmojiSequence[] }>;
53+
export type EmojiFn = (ctx: BaseAdapterContext) => Promise<any>;
54+
export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise<EmojiVariation[]>;
4355

4456
export const ADAPTERS = new Map<string, MojiAdapter>();
4557

src/adapter/v16.ts

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { EmojiSequence } from "../types";
1+
import type { EmojiSequence, EmojiVariation } from "../types";
22
import { defineMojiAdapter } from "../adapter";
33
import { FEMALE_SIGN, MALE_SIGN } from "../constants";
44
import { fetchCache } from "../utils/cache";
@@ -68,4 +68,52 @@ export default defineMojiAdapter({
6868
zwj: zwj || [],
6969
};
7070
},
71+
async emojis({ version, force }) {
72+
},
73+
variations: async (ctx) => {
74+
return fetchCache(`https://unicode.org/Public/${ctx.version}.0/ucd/emoji/emoji-variation-sequences.txt`, {
75+
cacheKey: `v${ctx.version}/variations.json`,
76+
parser(data) {
77+
const lines = data.split("\n");
78+
79+
const variations: EmojiVariation[] = [];
80+
81+
for (let line of lines) {
82+
// skip empty line & comments
83+
if (line.trim() === "" || line.startsWith("#")) {
84+
continue;
85+
}
86+
87+
// remove line comment
88+
const commentIndex = line.indexOf("#");
89+
if (commentIndex !== -1) {
90+
line = line.slice(0, commentIndex).trim();
91+
}
92+
93+
const [hex, style] = line.split(";").map((col) => col.trim()).slice(0, 4);
94+
95+
if (hex == null || style == null) {
96+
throw new Error(`invalid line: ${line}`);
97+
}
98+
99+
const hexcode = hex.replace(/\s+/g, "-");
100+
101+
const type = style.replace("style", "").trim();
102+
103+
if (type !== "text" && type !== "emoji") {
104+
throw new Error(`invalid style: ${style}`);
105+
}
106+
107+
variations.push({
108+
emoji: type === "emoji" ? hexcode : null,
109+
text: type === "text" ? hexcode : null,
110+
property: ["Emoji"],
111+
});
112+
}
113+
114+
return variations;
115+
},
116+
bypassCache: ctx.force,
117+
});
118+
},
71119
});

src/types.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,47 @@ export interface EmojiSequence {
4444
description: string;
4545
gender: string | null;
4646
}
47+
48+
export type Property =
49+
// An emoji character.
50+
| "Basic_Emoji"
51+
| "Emoji"
52+
// The presentation in which to display the emoji character. Either emoji or text.
53+
| "Emoji_Presentation"
54+
// An emoji or unicode character for modifying complex sequences (hair style, skin tone, etc),
55+
// and should never be used as a stand-alone emoji.
56+
| "Emoji_Component"
57+
// An emoji character that modifies a preceding emoji base character.
58+
| "Emoji_Modifier"
59+
// An emoji character. Can be modified with a subsequent emoji modifier.
60+
| "Emoji_Modifier_Base"
61+
// A sequence of a base and modifier ("Emoji_Modifier_Base" + "Emoji_Modifier").
62+
| "Emoji_Modifier_Sequence"
63+
// A sequence of unicode characters representing the available keys on a
64+
// phone dial: 0-9, *, # ("Key" + "FE0F" + "20E3").
65+
| "Emoji_Keycap_Sequence"
66+
// A sequence of 2 regional indicators representing a region flag (nation).
67+
// "Regional_Indicator" + "Regional_Indicator".
68+
| "Emoji_Flag_Sequence"
69+
// A sequence of characters that are not ZWJ or flag sequences.
70+
// Currently used for representing sub-region/division flags (country).
71+
| "Emoji_Tag_Sequence"
72+
// A sequence of multiple emoji characters joined with a zero-width-joiner (200D).
73+
| "Emoji_ZWJ_Sequence"
74+
// Either an "Emoji", "Emoji" + "FE0F", or "Emoji_Keycap_Sequence".
75+
| "Emoji_Combining_Sequence"
76+
// An emoji slot that is reserved for future allocations and releases.
77+
| "Extended_Pictographic"
78+
// A unicode character representing one of the 26 letters of the alphabet, A-Z.
79+
| "Regional_Indicator"
80+
// v13+ renamed properties
81+
| "RGI_Emoji_Flag_Sequence"
82+
| "RGI_Emoji_Modifier_Sequence"
83+
| "RGI_Emoji_Tag_Sequence"
84+
| "RGI_Emoji_ZWJ_Sequence";
85+
86+
export interface EmojiVariation {
87+
text: string | null;
88+
emoji: string | null;
89+
property?: Property[];
90+
}

0 commit comments

Comments
 (0)