Skip to content

Commit c39c039

Browse files
committed
Add gemoji alias/tag data
1 parent bf0e2c7 commit c39c039

File tree

5 files changed

+162
-5
lines changed

5 files changed

+162
-5
lines changed

.github/workflows/ghpages-deploy.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ jobs:
3636
./bin/ucd_download.sh
3737
./bin/ucd_to_json.mts
3838
39+
- name: Add gemoji data to JSON
40+
run: |
41+
./bin/gemoji_download.sh
42+
./bin/gemoji_add.mts
43+
3944
- name: Set Status Info
4045
run: |
4146
./bin/set_status.sh

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ See it in action: [emojisearch.org](https://www.emojisearch.org/)
2525
[![VectorLogoZone](https://www.vectorlogo.zone/logos/vectorlogozone/vectorlogozone-ar21.svg)](https://www.vectorlogo.zone/ "Logos")
2626
[![Vite](https://www.vectorlogo.zone/logos/vitejsdev/vitejsdev-ar21.svg)](https://vitejs.dev/ "Bundler")
2727

28+
* [Gemoji](https://github.com/github/gemoji)
29+
2830
## To Do
2931

3032
- [ ] images

bin/gemoji_add.mts

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#!/usr/bin/env node
2+
3+
import fs from 'fs/promises';
4+
import path from 'path';
5+
import { XMLParser } from 'fast-xml-parser';
6+
import { fileURLToPath } from "url";
7+
8+
const __filename = fileURLToPath(import.meta.url);
9+
const __dirname = path.dirname(__filename);
10+
11+
type SearchEntry = {
12+
codepoints: string;
13+
qualification: string;
14+
version: string;
15+
emoji: string;
16+
description: string;
17+
group: string;
18+
subgroup: string;
19+
keywords?: string[];
20+
}
21+
22+
type SearchData = {
23+
success: boolean;
24+
lastmod: string;
25+
data: SearchEntry[];
26+
}
27+
28+
type GemojiEntry = {
29+
emoji: string;
30+
description: string;
31+
aliases: string[];
32+
tags: string[];
33+
};
34+
35+
async function main() {
36+
console.log(`INFO: starting at ${new Date().toISOString()}`);
37+
38+
const gemojiPath = path.join( __dirname, '..', 'tmp', 'gemoji.json' );
39+
const jsonPath = path.join( __dirname, '..', 'public', 'emoji.json' );
40+
41+
try {
42+
await fs.access(gemojiPath);
43+
} catch (err) {
44+
console.log(`INFO: gemoji file does not exist in ${gemojiPath}`);
45+
process.exit(1);
46+
}
47+
48+
// Read and parse the Gemoji file
49+
console.log(`INFO: reading file from ${gemojiPath}`);
50+
const gemojiData = await fs.readFile(gemojiPath, "utf-8");
51+
console.log(`INFO: parsing gemoji data`);
52+
const gemoji = JSON.parse(gemojiData) as GemojiEntry[];
53+
54+
const gemojiMap: Map<string, GemojiEntry> = new Map();
55+
for (const entry of gemoji) {
56+
gemojiMap.set(entry.emoji, entry);
57+
}
58+
59+
try {
60+
await fs.access(jsonPath);
61+
} catch (err) {
62+
console.log(`INFO: data file does not exist in ${jsonPath}`);
63+
process.exit(1);
64+
}
65+
66+
// read and parse the emoji.json file
67+
console.log(`INFO: reading file from ${jsonPath}`);
68+
const rawData = await fs.readFile(jsonPath, "utf-8");
69+
console.log(`INFO: parsing emoji data`);
70+
const data = JSON.parse(rawData) as SearchData;
71+
72+
for (const row of data.data) {
73+
const gemojiEntry = gemojiMap.get(row.emoji);
74+
if (!gemojiEntry) {
75+
console.log(`WARN: no gemoji entry found for emoji ${row.emoji} (${row.description})`);
76+
continue;
77+
}
78+
79+
// merge tags and aliases
80+
const newTags = new Set<string>([...gemojiEntry.tags, ...gemojiEntry.aliases]);
81+
if (newTags.size === 0) {
82+
continue;
83+
}
84+
row.keywords = Array.from(newTags);
85+
}
86+
87+
// save the updated json data
88+
console.log(`INFO: writing emoji data to ${jsonPath}`);
89+
await fs.writeFile(jsonPath, JSON.stringify(data, null, 2), 'utf-8');
90+
console.log(`INFO: wrote JSON data to ${jsonPath}`);
91+
}
92+
93+
94+
95+
main().then( () => {
96+
console.log(`INFO: complete at ${new Date().toISOString()}`);
97+
});

bin/gemoji_download.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Download gemoji data from Github
4+
#
5+
6+
set -o nounset
7+
set -o errexit
8+
set -o pipefail
9+
10+
SCRIPT_HOME="$( cd "$( dirname "$0" )" && pwd )"
11+
BASE_DIR=$(realpath "${SCRIPT_HOME}/..")
12+
13+
echo "INFO: starting gemoji download at $(date -u +"%Y-%m-%dT%H:%M:%SZ")"
14+
15+
TMP_DIR="${BASE_DIR}/tmp"
16+
if [ ! -d "${TMP_DIR}" ]; then
17+
echo "INFO: creating temp dir ${TMP_DIR}"
18+
mkdir -p "${TMP_DIR}"
19+
else
20+
echo "INFO: using existing temp dir ${TMP_DIR}"
21+
fi
22+
23+
curl \
24+
--location \
25+
--output "${TMP_DIR}/gemoji.json" \
26+
--show-error \
27+
--silent \
28+
https://raw.githubusercontent.com/github/gemoji/refs/heads/master/db/emoji.json
29+
30+
echo "INFO: completed gemoji download at $(date -u +"%Y-%m-%dT%H:%M:%SZ")"

src/index.ts

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ type SearchEntry = {
2626
version: string;
2727
group: string;
2828
subgroup: string;
29+
keywords?: string[];
2930
};
3031

3132
type SearchData = {
@@ -38,6 +39,7 @@ type EmojiData = {
3839
emoji: string;
3940
description: string;
4041
tags: string[];
42+
keywords?: string[];
4143
}
4244

4345
const dataUrl = "/emoji.json";
@@ -50,12 +52,17 @@ function filterDescription(
5052
) {
5153
if (!headerValue) return true;
5254

53-
const rowValue = rowData.description as string;
55+
const rowValues = [rowData.description, ...(rowData.keywords || [])];
5456

5557
if (headerValue.length == 1 && headerValue != "^" && headerValue != "/") {
5658
// single character, do starts with
5759
const search = headerValue.toLowerCase();
58-
return rowValue.toLowerCase().startsWith(search);
60+
for (const rowValue of rowValues) {
61+
if (rowValue.toLowerCase().startsWith(search)) {
62+
return true;
63+
}
64+
}
65+
return false;
5966
}
6067

6168
if (headerValue.startsWith("^")) {
@@ -64,15 +71,25 @@ function filterDescription(
6471
return true;
6572
}
6673
const search = headerValue.substring(1).toLowerCase();
67-
return rowValue.toLowerCase().startsWith(search);
74+
for (const rowValue of rowValues) {
75+
if (rowValue.toLowerCase().startsWith(search)) {
76+
return true;
77+
}
78+
}
79+
return false;
6880
}
6981

7082
if (headerValue.startsWith("/") && headerValue.endsWith("/")) {
7183
// regex
7284
const pattern = headerValue.substring(1, headerValue.length - 1);
7385
try {
7486
const re = new RegExp(pattern, "i");
75-
return re.test(rowValue);
87+
for (const rowValue of rowValues) {
88+
if (re.test(rowValue)) {
89+
return true;
90+
}
91+
}
92+
return false;
7693
} catch (e) {
7794
// bad regex
7895
return false;
@@ -81,7 +98,12 @@ function filterDescription(
8198

8299
// contains
83100
const search = headerValue.toLowerCase();
84-
return rowValue.toLowerCase().includes(search);
101+
for (const rowValue of rowValues) {
102+
if (rowValue.toLowerCase().includes(search)) {
103+
return true;
104+
}
105+
}
106+
return false;
85107
}
86108

87109
function filterTags(
@@ -235,6 +257,7 @@ async function main() {
235257
emoji: row.emoji,
236258
description: row.description,
237259
tags,
260+
keywords: row.keywords,
238261
} );
239262
}
240263

0 commit comments

Comments
 (0)