-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimport-hn.ts
128 lines (112 loc) · 3.14 KB
/
import-hn.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import assert from 'assert'
import * as fs from 'fs'
import zlib from 'zlib'
import * as cliProgress from 'cli-progress'
import { glob } from 'glob'
import TurndownService from 'turndown'
import { db } from '#app/database/db.ts'
import { createPost } from '#app/modules/posts/post-service.ts'
const turndownService = new TurndownService({ emDelimiter: '*' })
export async function importHN() {
let patterns = process.argv.slice(2)
assert(patterns, 'Missing filename arguments')
const files = await glob(patterns, {})
files.forEach(file => {
importHNPostsFromFile(file)
})
}
async function importHNPostsFromFile(filename: string) {
await readJsonLinesFromFile(filename)
.then(async items => {
const bar1 = new cliProgress.SingleBar(
{},
cliProgress.Presets.shades_classic,
)
console.log(filename, items.length)
bar1.start(items.length, 0)
let idMap = new Map<string, number>()
let i = 0
for (let item of items) {
let parentId: number | null = null
if (item.parent !== null && item.parent !== 0) {
parentId = idMap.get(item.parent) || null
if (parentId == null) {
throw new Error('Parent id not found', item.parent)
}
}
const by = item.by
const ourUserId = 'hn:' + by
await db
.insertInto('User')
.values({
id: ourUserId,
username: by,
email: 'hn-user' + by + '@test.com',
isAdmin: 0,
// password: "passw0rd"
// password: "createPassword("user" + i)"
})
.onConflict(oc => oc.column('id').doNothing())
.execute()
const htmlString = item.text
let markdown = turndownService.turndown(htmlString)
if (item.title) {
if (item.url) {
markdown = `# [${item.title}](${item.url})\n\n${markdown}`
} else {
markdown = `# ${item.title}\n\n${markdown}`
}
}
const postId = await db.transaction().execute(
async trx =>
await createPost(trx, parentId, markdown, ourUserId, {
isPrivate: false,
withUpvote: true,
}),
)
idMap.set(item.id, postId)
bar1.update(++i)
}
bar1.stop()
})
.catch(error => {
console.error('Error:', error)
})
}
async function readJsonLinesFromFile(filePath: string): Promise<any[]> {
return await new Promise((resolve, reject) => {
const gunzip = zlib.createGunzip()
let readStream = filePath.endsWith('.gz')
? fs.createReadStream(filePath).pipe(gunzip)
: fs.createReadStream(filePath, { encoding: 'utf-8' })
const jsonObjects: any[] = []
let partialLine = ''
readStream.on('data', chunk => {
const lines = (partialLine + chunk).split('\n')
partialLine = lines.pop() || '' // Handle incomplete last line
lines.forEach(line => {
if (line.trim()) {
try {
jsonObjects.push(JSON.parse(line))
} catch (error) {
reject(new Error(`Invalid JSON in file: ${line}`))
}
}
})
})
readStream.on('end', () => {
if (partialLine.trim()) {
try {
jsonObjects.push(JSON.parse(partialLine))
} catch (error) {
reject(new Error(`Invalid JSON in file: ${partialLine}`))
}
}
resolve(jsonObjects)
})
readStream.on('error', error => {
reject(error)
})
})
}
importHN()