Skip to content

Commit bcb252d

Browse files
committed
fix(feeds): sanitize feed content to be utf8
Our postgres db expects utf8
1 parent e1bec7f commit bcb252d

File tree

4 files changed

+98
-6
lines changed

4 files changed

+98
-6
lines changed

pkg/apps/feeds/cron.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,10 @@ func DigestOptionToTime(lastDigest time.Time, interval string) time.Time {
106106
}
107107

108108
func getFeedItemID(logger *slog.Logger, item *gofeed.Item) string {
109-
guid := item.GUID
109+
guid := strings.ToValidUTF8(item.GUID, "")
110110
if item.GUID == "" {
111111
logger.Info("no <guid> found for feed item, using <link> instead for its unique id")
112-
return item.Link
112+
return strings.ToValidUTF8(item.Link, "")
113113
}
114114
return guid
115115
}
@@ -487,9 +487,9 @@ func (f *Fetcher) FetchAll(logger *slog.Logger, urls []string, inlineContent boo
487487
PostID: post.ID,
488488
GUID: uid,
489489
Data: db.FeedItemData{
490-
Title: item.Title,
491-
Description: item.Description,
492-
Content: item.Content,
490+
Title: strings.ToValidUTF8(item.Title, ""),
491+
Description: strings.ToValidUTF8(item.Description, ""),
492+
Content: strings.ToValidUTF8(item.Content, ""),
493493
Link: item.Link,
494494
PublishedAt: item.PublishedParsed,
495495
},

pkg/db/postgres/storage.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1561,7 +1561,7 @@ func (me *PsqlDB) InsertFeedItems(postID string, items []*db.FeedItem) error {
15611561
item.Data,
15621562
)
15631563
if err != nil {
1564-
return err
1564+
return fmt.Errorf("post id:%s, guid:%s, err:%w", item.PostID, item.GUID, err)
15651565
}
15661566
}
15671567

test.txt

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
=: digest_interval 1day
3+
=: inline_content false
4+
=> https://blog.pico.sh/rss
5+
=> https://uncenter.dev/feed.xml
6+
=> https://ryanccn.dev/feed/rss.xml
7+
=> https://shivjm.blog/feed.xml
8+
=> https://binyam.in/feeds/blog.xml
9+
=> https://tonsky.me/blog/atom.xml
10+
=> https://tty1.blog/feed/
11+
=> https://sindresorhus.com/rss.xml
12+
=> https://bower.sh/rss
13+
=> https://fasterthanli.me/index.xml
14+
=> https://antfu.me/feed.xml
15+
=> https://zackoverflow.dev/rss.xml
16+
=> https://humanwhocodes.com/feeds/blog.xml
17+
=> https://mitchellh.com/feed.xml
18+
=> https://chrisdone.com/rss.xml
19+
=> https://www.hoeser.dev/feed.xml
20+
=> https://daverupert.com/atom.xml
21+
=> https://blog.orhun.dev/rss.xml
22+
=> https://evanhahn.com/blog/index.xml
23+
=> https://www.11ty.dev/blog/feed.xml
24+
=> https://thorstenball.com/atom.xml
25+
=> https://registerspill.thorstenball.com/feed
26+
=> https://isabelroses.com/rss.xml
27+
=> https://boehs.org/in/blog.xml
28+
=> https://huonw.github.io/blog/atom.xml
29+
=> https://nixpkgs.news/rss.xml
30+
=> https://rbluethl.com/rss
31+
=> https://lucumr.pocoo.org/feed.atom
32+
=> https://yorickpeterse.com/feed.xml
33+
=> https://ardislu.dev/atom.xml
34+
=> https://kettanaito.com/blog/rss.xml
35+
=> https://kilo.bytesize.xyz/feed/
36+
=> https://bytesize.xyz/feed/
37+
=> https://zed.dev/blog.rss
38+
=> https://www.sophiajt.com/atom.xml
39+
=> https://robinmalfait.com/feed.xml
40+
=> https://twobithistory.org/feed.xml
41+
=> https://ruudvanasseldonk.com/feed.xml
42+
=> https://blog.rust-lang.org/feed.xml
43+
=> https://nolanlawson.com/feed/
44+
=> https://matklad.github.io/feed.xml
45+
=> https://www.ntietz.com/atom.xml
46+
=> https://2ality.com/feeds/posts.atom
47+
=> https://dbushell.com/rss.xml

test_utf8.txt

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
2+
=: digest_interval 1day
3+
=: inline_content false
4+
=> https://blog.pico.sh/rss
5+
=> https://uncenter.dev/feed.xml
6+
=> https://ryanccn.dev/feed/rss.xml
7+
=> https://shivjm.blog/feed.xml
8+
=> https://binyam.in/feeds/blog.xml
9+
=> https://tonsky.me/blog/atom.xml
10+
=> https://tty1.blog/feed/
11+
=> https://sindresorhus.com/rss.xml
12+
=> https://bower.sh/rss
13+
=> https://fasterthanli.me/index.xml
14+
=> https://antfu.me/feed.xml
15+
=> https://zackoverflow.dev/rss.xml
16+
=> https://humanwhocodes.com/feeds/blog.xml
17+
=> https://mitchellh.com/feed.xml
18+
=> https://chrisdone.com/rss.xml
19+
=> https://www.hoeser.dev/feed.xml
20+
=> https://daverupert.com/atom.xml
21+
=> https://blog.orhun.dev/rss.xml
22+
=> https://evanhahn.com/blog/index.xml
23+
=> https://www.11ty.dev/blog/feed.xml
24+
=> https://thorstenball.com/atom.xml
25+
=> https://registerspill.thorstenball.com/feed
26+
=> https://isabelroses.com/rss.xml
27+
=> https://boehs.org/in/blog.xml
28+
=> https://huonw.github.io/blog/atom.xml
29+
=> https://nixpkgs.news/rss.xml
30+
=> https://rbluethl.com/rss
31+
=> https://lucumr.pocoo.org/feed.atom
32+
=> https://yorickpeterse.com/feed.xml
33+
=> https://ardislu.dev/atom.xml
34+
=> https://kettanaito.com/blog/rss.xml
35+
=> https://kilo.bytesize.xyz/feed/
36+
=> https://bytesize.xyz/feed/
37+
=> https://zed.dev/blog.rss
38+
=> https://www.sophiajt.com/atom.xml
39+
=> https://robinmalfait.com/feed.xml
40+
=> https://twobithistory.org/feed.xml
41+
=> https://ruudvanasseldonk.com/feed.xml
42+
=> https://blog.rust-lang.org/feed.xml
43+
=> https://nolanlawson.com/feed/
44+
=> https://matklad.github.io/feed.xml
45+
=> https://www.ntietz.com/atom.xml

0 commit comments

Comments
 (0)