Skip to content

Commit 6fd789a

Browse files
committed
Adapt UnicodeSearch into EmojiSearch
1 parent 94c4f3e commit 6fd789a

File tree

10 files changed

+112
-547
lines changed

10 files changed

+112
-547
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ dist
44
*.env
55
*.log
66
node_modules
7-
public/ucd.json
7+
public/emoji.json
88
tmp
99
*.tmp
1010

LICENSE.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ to attach them to the start of each source file to most effectively
629629
state the exclusion of warranty; and each file should have at least
630630
the "copyright" line and a pointer to where the full notice is found.
631631

632-
UnicodeSearch - search through the Unicode character database
632+
EmojiSearch - search through Emoji
633633
Copyright (C) 2025 by Andrew Marcuse
634634

635635
This program is free software: you can redistribute it and/or modify

README.md

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
# UnicodeSearch <img alt="VectorLogoZone logo" src="public/favicon.svg" height="90" align="right" />
1+
# EmojiSearch <img alt="VectorLogoZone logo" src="public/favicon.svg" height="90" align="right" />
22

3-
[![deploy](https://github.com/FileFormatInfo/unicodesearch/actions/workflows/ghpages-deploy.yaml/badge.svg)](https://github.com/FileFormatInfo/unicodesearch/actions/workflows/ghpages-deploy.yaml)
3+
[![deploy](https://github.com/FileFormatInfo/emojisearch/actions/workflows/ghpages-deploy.yaml/badge.svg)](https://github.com/FileFormatInfo/emojisearch/actions/workflows/ghpages-deploy.yaml)
44

5-
A web app for searching and browsing the Unicode character database.
5+
A web app for searching and browsing the Unicode emoji database.
66

7-
See it in action: [unicodesearch.org](https://www.unicodesearch.org/)
7+
See it in action: [emojisearch.org](https://www.emojisearch.org/)
88

99
## Links
1010

11-
- [Unicode Character Database](https://www.unicode.org/Public/17.0.0/ucd/) - XML data source for this app.
12-
- [Unicode Annex #42](https://www.unicode.org/reports/tr42/) - what the fields in the UCD mean.
11+
- [Emoji at the Unicode Character Database](https://www.unicode.org/Public/UCD/latest/ucd/emoji/) - data source for this app.
12+
- [Unicode Technical Standard #41: Emoji](https://unicode.org/reports/tr51/) - what the fields in the UCD mean.
1313

1414
## Credits
1515

@@ -19,20 +19,14 @@ See it in action: [unicodesearch.org](https://www.unicodesearch.org/)
1919
[![Google Noto Emoji](https://www.vectorlogo.zone/logos/google/google-ar21.svg)](https://github.com/googlefonts/noto-emoji/blob/43f47be9404018cd9d8f73a227363a8f20acdab5/svg/emoji_u1f984.svg "Icon")
2020
[![Node.js](https://www.vectorlogo.zone/logos/nodejs/nodejs-ar21.svg)](https://nodejs.org/ "Application Server")
2121
[![npm](https://www.vectorlogo.zone/logos/npmjs/npmjs-ar21.svg)](https://www.npmjs.com/ "JS Package Management")
22-
[![Unicode](https://www.vectorlogo.zone/logos/unicode/unicode-ar21.svg)](https://www.unicode.org/Public/17.0.0/ucdxml/ "Unicode Character Database")
22+
[![Unicode](https://www.vectorlogo.zone/logos/unicode/unicode-ar21.svg)](https://www.unicode.org/Public/17.0.0/ "Unicode Character Database")
2323
[![Tabulator](https://www.vectorlogo.zone/logos/tabulatorinfo/tabulatorinfo-ar21.svg)](https://tabulator.info/ "Grid widget")
2424
[![TypeScript](https://www.vectorlogo.zone/logos/typescriptlang/typescriptlang-ar21.svg)](https://www.typescriptlang.org/ "Programming Language")
2525
[![VectorLogoZone](https://www.vectorlogo.zone/logos/vectorlogozone/vectorlogozone-ar21.svg)](https://www.vectorlogo.zone/ "Logos")
2626
[![Vite](https://www.vectorlogo.zone/logos/vitejsdev/vitejsdev-ar21.svg)](https://vitejs.dev/ "Bundler")
2727

2828
## To Do
2929

30-
- [ ] handle `#` in name
31-
- [ ] accurate total count
32-
- [ ] preview column (SVGs)
3330
- [ ] bigger font for example column
34-
- [ ] status (for filter/sort) in footer
3531
- [ ] test dark mode
3632
- [ ] share links in footer
37-
- [ ] tags: bidi, letter, number
38-
- [ ] tag filters

bin/ucd_download.sh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22
#
3-
# Download and unzip the Unicode Character Database XML file
3+
# Download and unzip the emoji test file
44
# from Unicode.org.
55
#
66

@@ -23,12 +23,9 @@ fi
2323

2424
curl \
2525
--location \
26-
--output "${TMP_DIR}/ucd.all.flat.zip" \
26+
--output "${TMP_DIR}/emoji-test.txt" \
2727
--show-error \
2828
--silent \
29-
https://www.unicode.org/Public/latest/ucdxml/ucd.all.flat.zip
30-
31-
cd "${TMP_DIR}"
32-
unzip ucd.all.flat.zip
29+
https://www.unicode.org/Public/17.0.0/emoji/emoji-test.txt
3330

3431
echo "INFO: completed download at $(date -u +"%Y-%m-%dT%H:%M:%SZ")"

bin/ucd_to_json.ts

Lines changed: 48 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -9,105 +9,83 @@ const __filename = fileURLToPath(import.meta.url);
99
const __dirname = path.dirname(__filename);
1010

1111
type SearchEntry = {
12-
code: string;
13-
name: string;
14-
age: string;
15-
block: string;
16-
category: string;
17-
script: string;
18-
tags?: string[];
12+
codepoints: string;
13+
qualification: string;
14+
version: string;
15+
emoji: string;
16+
description: string;
17+
group: string;
18+
subgroup: string;
1919
}
2020

2121
type SearchData = {
2222
success: boolean;
23+
lastmod: string;
2324
data: SearchEntry[];
2425
}
2526

2627
async function main() {
2728
console.log(`INFO: starting at ${new Date().toISOString()}`);
2829

29-
const xmlPath = path.join( __dirname, '..', 'tmp', 'ucd.all.flat.xml' );
30-
const jsonPath = path.join( __dirname, '..', 'public', 'ucd.json' );
30+
const txtPath = path.join( __dirname, '..', 'tmp', 'emoji-test.txt' );
31+
const jsonPath = path.join( __dirname, '..', 'public', 'emoji.json' );
3132

3233
try {
33-
await fs.access(xmlPath);
34+
await fs.access(txtPath);
3435
} catch (err) {
35-
console.log(`INFO: XML file does not exist in ${xmlPath}`);
36+
console.log(`INFO: txt file does not exist in ${txtPath}`);
3637
process.exit(1);
3738
}
3839

3940
// Read and parse the XML file
40-
console.log(`INFO: reading XML file from ${xmlPath}`);
41-
const xmlData = await fs.readFile(xmlPath, 'utf-8');
42-
console.log(`INFO: parsing XML data`);
43-
const parser = new XMLParser({
44-
ignoreAttributes: false,
45-
attributeNamePrefix: '',
46-
});
47-
const jsonObj = parser.parse(xmlData);
48-
49-
console.log(`INFO: parsed ${jsonObj.ucd.repertoire.char.length} characters`);
50-
51-
if (true) {
52-
fs.writeFile(
53-
path.join(__dirname, "..", "tmp", "ucd.all.flat.json"),
54-
JSON.stringify(jsonObj, null, 2),
55-
"utf-8"
56-
);
57-
}
58-
59-
console.log(`INFO: generating JSON data`);
60-
const entries: SearchEntry[] = [];
61-
62-
for (const charData of jsonObj.ucd.repertoire.char) {
63-
64-
if (!charData.cp || charData.cp.length === 0) {
65-
if (!charData['first-cp']) { // some private use area ranges mixed in
66-
console.log(`WARN: skipping entry with no code point (${JSON.stringify(charData)})`);
67-
}
41+
console.log(`INFO: reading XML file from ${txtPath}`);
42+
const txtData = await fs.readFile(txtPath, 'utf-8');
43+
console.log(`INFO: parsing txt data`);
44+
45+
const data: SearchEntry[] = [];
46+
let currentGroup = '';
47+
let currentSubgroup = '';
48+
49+
const lines = txtData.split(/\r?\n/);
50+
for (const line of lines) {
51+
// console.log(`LINE: ${line}`);
52+
if (line.length == 0) {
6853
continue;
6954
}
70-
71-
const tags: string[] = [];
72-
if (charData.WSpace === 'Y') {
73-
tags.push('Whitespace');
74-
}
75-
if (charData.Emoji === 'Y') {
76-
tags.push('Emoji');
77-
}
78-
if (charData.Dep === 'Y') {
79-
tags.push('Deprecated');
80-
}
81-
if (charData.QMark === 'Y') {
82-
tags.push('Quote');
83-
}
84-
if (charData.Dash === 'Y') {
85-
tags.push('Dash');
55+
if (line.startsWith('#')) {
56+
// comment line
57+
if (line.startsWith('# group: ')) {
58+
currentGroup = line.replace('# group: ', '').trim();
59+
} else if (line.startsWith('# subgroup: ')) {
60+
currentSubgroup = line.replace('# subgroup: ', '').trim();
61+
}
62+
continue;
8663
}
8764

88-
var name = charData.na || charData.na1;
89-
if (!name && charData['name-alias']) {
90-
name = charData['name-alias'][0].alias;
65+
const match = line.match(/^([0-9A-F ]+)\s*;\s*(fully-qualified|minimally-qualified|unqualified|component)\s*# (.+) E([0-9]+[.][0-9]+) (.+)$/);
66+
if (match) {
67+
data.push( {
68+
codepoints: match[1].trim(),
69+
qualification: match[2].trim(),
70+
emoji: match[3].trim(),
71+
description: match[5].trim(),
72+
version: match[4].trim(),
73+
group: currentGroup,
74+
subgroup: currentSubgroup,
75+
});
76+
} else {
77+
console.log(`DEBUG: no match for line: ${line}`);
9178
}
92-
93-
entries.push({
94-
code: charData.cp,
95-
name: name || "(no name)",
96-
age: charData.age,
97-
block: charData.blk.replaceAll('_', ' '),
98-
category: charData.gc,
99-
script: charData.sc,
100-
tags: tags.length ? tags : undefined,
101-
});
10279
}
10380

10481
const output: SearchData = {
10582
success: true,
106-
data: entries,
83+
lastmod: new Date().toISOString(),
84+
data,
10785
};
10886

10987
// Write the JSON data to a file
110-
console.log(`INFO: writing JSON data to ${jsonPath}`);
88+
console.log(`INFO: writing ${data.length} emoji data to ${jsonPath}`);
11189
await fs.writeFile(jsonPath, JSON.stringify(output, null, 2), 'utf-8');
11290
console.log(`INFO: wrote JSON data to ${jsonPath}`);
11391
}

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"author": "[email protected]",
3-
"description": "UnicodeSearch",
3+
"description": "EmojiSearch",
44
"devDependencies": {
55
"@types/node": "^24.3.1",
66
"@types/tabulator-tables": "^6.2.11",

public/favicon.ico

-65.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)