Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.

Commit 4d78fd9

Browse files
itohsnapnalabelle
authored andcommitted
Apply rules more selectively; add selection options
Fixes dewey#13 - all rules in the killfile were being applied to all feeds that had any rule specified, regardless if a given rule applied to a specific feed. Also adds the ability to set Filter Expressions involving post authors, and to list Miniflux categories instead of a specific feed URL, in killfiles. (cherry picked from commit 80f3c3a)
1 parent 86fa679 commit 4d78fd9

File tree

6 files changed

+154
-103
lines changed

6 files changed

+154
-103
lines changed

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ ignore-article "<feed>" "<filterexpr>"
2222

2323
This contains the URL of the feed that should be matched. It fuzzy matches the URL so if you only have one feed just use the base URL of the site. Example: `https://example.com` if the feed is on `https://example.com/rss/atom.xml`. A wildcard selector of `*` is also supported instead of the URL.
2424

25+
Alternately, you may specify a comma-separated list of categories whose feeds should be matched by starting the value with `category:`. Example: `category:Photos`.
26+
2527
### `<filterexpr>` Filter Expressions
2628

2729
From the [available rule set](https://newsboat.org/releases/2.15/docs/newsboat.html#_filter_language) and attributes (`Table 5. Available Attributes`) only a small subset are supported right now. These should cover most use cases already though.
@@ -30,6 +32,7 @@ From the [available rule set](https://newsboat.org/releases/2.15/docs/newsboat.h
3032

3133
- `title`
3234
- `content`
35+
- `author`
3336

3437
**Comparison Operators**
3538

@@ -59,6 +62,12 @@ This one filters out all feed items that have the word `lunar` OR `moon` in ther
5962
ignore-article "https://xkcd.com/atom.xml" "title =~ (?i)(lunAR|MOON)"
6063
```
6164

65+
This one marks read all feed items without an image in feeds assigned a category of `Photos`.
66+
```
67+
ignore-article "category:Photos" "content !~ (?i)(img src=)"
68+
69+
```
70+
6271
### Testing rules
6372

6473
There are tests in `filter/` that can be used to easily test rules or add new comparison operators.
@@ -67,6 +76,8 @@ There are tests in `filter/` that can be used to easily test rules or add new co
6776

6877
There are the environment variables that can be set. If you want to use a local file you can set `MF_KILLFILE_PATH="~/path/to/killfile"`. A local killfile always overwrites a remote one, even if the remote killfile URL is set (`MF_KILLFILE_URL`). `MF_USERNAME`, `MF_PASSWORD` and `MF_API_ENDPOINT` are your Miniflux credentials. If `MF_REFRESH_INTERVAL` isn't set it's running on every 30 minutes of every hour (`0 30 * * * *`).
6978

79+
Note that `MF_KILLFILE_REFRESH_HOURS` is currently only supported for remote killfiles. You'll need to restart miniflux-sidekick to get it to recognize an updated local killfile.
80+
7081
```
7182
export MF_ENVIRONMENT=development
7283
export MF_PORT=8181

cmd/api/main.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ func main() {
103103
level.Error(l).Log("err", err)
104104
return
105105
}
106-
parsedRules, err := localRepo.FetchRules(*killfilePath)
106+
parsedRules, err := localRepo.FetchRules(*killfilePath, l)
107107
if err != nil {
108108
level.Error(l).Log("err", err)
109109
return
@@ -119,7 +119,7 @@ func main() {
119119
level.Error(l).Log("err", err)
120120
return
121121
}
122-
parsedRules, err := githubRepo.FetchRules(*killfileURL)
122+
parsedRules, err := githubRepo.FetchRules(*killfileURL, l)
123123
if err != nil {
124124
level.Error(l).Log("err", err)
125125
return
@@ -139,7 +139,7 @@ func main() {
139139
for {
140140
select {
141141
case <-ticker.C:
142-
if err := githubRepo.RefreshRules(*killfileURL); err != nil {
142+
if err := githubRepo.RefreshRules(*killfileURL, l); err != nil {
143143
level.Error(l).Log("err", err)
144144
}
145145
}

filter/service.go

Lines changed: 100 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,25 @@ type Service interface {
1717
}
1818

1919
type service struct {
20-
rulesRepository rules.Repository
21-
client *miniflux.Client
22-
l log.Logger
20+
rulesRepository rules.Repository
21+
client *miniflux.Client
22+
l log.Logger
2323
}
2424

2525
// NewService initializes a new filter service
2626
func NewService(l log.Logger, c *miniflux.Client, rr rules.Repository) Service {
2727
return &service{
28-
rulesRepository: rr,
29-
client: c,
30-
l: l,
28+
rulesRepository: rr,
29+
client: c,
30+
l: l,
3131
}
3232
}
3333

3434
func (s *service) Run() {
3535
s.RunFilterJob(false)
3636
}
3737

38+
// NOTE (DIRTY HACK): this next var has also been defined in rules/rules.go. If the regex is updated here, it should be updated there as well
3839
var filterEntryRegex = regexp.MustCompile(`(\w+?) (\S+?) (.+)`)
3940

4041
func (s *service) RunFilterJob(simulation bool) {
@@ -44,9 +45,13 @@ func (s *service) RunFilterJob(simulation bool) {
4445
level.Error(s.l).Log("err", err)
4546
return
4647
}
48+
49+
feedLoop:
4750
for _, feed := range f {
4851
// Check if the feed matches one of our rules
4952
var found bool
53+
var entries *miniflux.EntryResultSet
54+
5055
for _, rule := range s.rulesRepository.Rules() {
5156
// Also support the wildcard selector
5257
if rule.URL == "*" {
@@ -55,100 +60,112 @@ func (s *service) RunFilterJob(simulation bool) {
5560
if strings.Contains(feed.FeedURL, rule.URL) {
5661
found = true
5762
}
58-
}
59-
if !found {
60-
continue
61-
}
62-
63-
// We then get all the unread entries of the feed that matches our rule
64-
entries, err := s.client.FeedEntries(feed.ID, &miniflux.Filter{
65-
Status: miniflux.EntryStatusUnread,
66-
})
67-
if err != nil {
68-
level.Error(s.l).Log("err", err)
69-
continue
70-
}
71-
72-
// We then check if the entry title matches a rule, if it matches we set it to "read" so we don't see it any more
73-
var matchedEntries []int64
74-
for _, entry := range entries.Entries {
75-
if s.evaluateRules(entry) {
76-
level.Info(s.l).Log("msg", "entry matches rules in the killfile", "entry_id", entry.ID, "feed_id", feed.ID)
77-
matchedEntries = append(matchedEntries, entry.ID)
63+
// Alt: Instead of a URL, specify "category:" followed by a comma-separated list of Miniflux categories to add a rule that affects every feed in those categories.
64+
if strings.EqualFold(rule.URL[0:9], "category:") {
65+
categoryTokens := strings.Split(rule.URL[9:], ",")
66+
for _, ct := range categoryTokens {
67+
if strings.EqualFold(feed.Category.Title, strings.TrimSpace(ct)) {
68+
found = true
69+
break
70+
}
71+
}
7872
}
79-
}
80-
if simulation {
81-
for _, me := range matchedEntries {
82-
e, err := s.client.Entry(me)
73+
if !found {
74+
continue
75+
}
76+
77+
if entries == nil {
78+
// Get all the unread entries of the feed that matches our rule. Only need to do this once per feed
79+
entries, err = s.client.FeedEntries(feed.ID, &miniflux.Filter{
80+
Status: miniflux.EntryStatusUnread,
81+
})
8382
if err != nil {
8483
level.Error(s.l).Log("err", err)
85-
return
84+
continue feedLoop // failure to load entries => move to next feed
8685
}
87-
level.Info(s.l).Log("msg", "would set status to read", "entry_id", me, "entry_title", e.Title)
8886
}
89-
} else {
90-
for _, me := range matchedEntries {
91-
level.Info(s.l).Log("msg", "set status to read", "entry_id", me)
92-
if err := s.client.UpdateEntries([]int64{me}, miniflux.EntryStatusRead); err != nil {
93-
level.Error(s.l).Log("msg", "error on updating the feed entries", "ids", me, "err", err)
94-
return
87+
88+
// We then check if the entry title matches a rule, if it matches we set it to "read" so we don't see it any more
89+
var matchedEntries []int64
90+
for _, entry := range entries.Entries {
91+
if s.evaluateRule(entry, rule) {
92+
level.Info(s.l).Log("msg", "entry matches rules in the killfile", "entry_id", entry.ID, "feed_id", feed.ID)
93+
matchedEntries = append(matchedEntries, entry.ID)
9594
}
9695
}
97-
}
98-
if len(matchedEntries) > 0 {
99-
level.Info(s.l).Log("msg", "marked all matched feed items as read", "affected", len(matchedEntries))
96+
97+
if simulation {
98+
for _, me := range matchedEntries {
99+
e, err := s.client.Entry(me)
100+
if err != nil {
101+
level.Error(s.l).Log("err", err)
102+
return
103+
}
104+
level.Info(s.l).Log("msg", "would set status to read", "entry_id", me, "entry_title", e.Title)
105+
}
106+
} else {
107+
for _, me := range matchedEntries {
108+
level.Info(s.l).Log("msg", "set status to read", "entry_id", me)
109+
if err := s.client.UpdateEntries([]int64{me}, miniflux.EntryStatusRead); err != nil {
110+
level.Error(s.l).Log("msg", "error on updating the feed entries", "ids", me, "err", err)
111+
return
112+
}
113+
}
114+
}
115+
if len(matchedEntries) > 0 {
116+
level.Info(s.l).Log("msg", "marked all matched feed items as read", "affected", len(matchedEntries))
117+
}
118+
100119
}
101120
}
102121
}
103122

104-
// evaluateRules checks a feed items against the available rules. It returns wheater this entry should be killed or not.
105-
func (s service) evaluateRules(entry *miniflux.Entry) bool {
123+
// evaluateRule checks a feed item against a particular rule. It returns whether this entry should be killed or not.
124+
func (s service) evaluateRule(entry *miniflux.Entry, rule rules.Rule) bool {
106125
var shouldKill bool
107-
for _, rule := range s.rulesRepository.Rules() {
108-
tokens := filterEntryRegex.FindStringSubmatch(rule.FilterExpression)
109-
if tokens == nil || len(tokens) != 4 {
110-
level.Error(s.l).Log("err", "invalid filter expression", "expression", rule.FilterExpression)
111-
continue
112-
}
113-
// We set the string we want to compare against (https://newsboat.org/releases/2.15/docs/newsboat.html#_filter_language are supported in the killfile format)
114-
var entryTarget string
115-
switch tokens[1] {
116-
case "title":
117-
entryTarget = entry.Title
118-
case "description":
119-
entryTarget = entry.Content
120-
case "author":
121-
entryTarget = entry.Author
122-
}
123126

124-
// We check what kind of comparator was given
125-
switch tokens[2] {
126-
case "=~", "!~":
127-
invertFilter := tokens[2][0] == '!'
127+
// The next line should succeed; we tested it would when we loaded our rules
128+
tokens := filterEntryRegex.FindStringSubmatch(rule.FilterExpression)
128129

129-
matched, err := regexp.MatchString(tokens[3], entryTarget)
130-
if err != nil {
131-
level.Error(s.l).Log("err", err)
132-
}
130+
// We set the string we want to compare against (https://newsboat.org/releases/2.15/docs/newsboat.html#_filter_language are supported in the killfile format)
131+
var entryTarget string
132+
switch tokens[1] {
133+
case "title":
134+
entryTarget = entry.Title
135+
case "content", "description":
136+
// include "description" for backwards compatibility with existing killfiles; nobody should be marking entries as read based on the feed's general description
137+
entryTarget = entry.Content
138+
case "author":
139+
entryTarget = entry.Author
140+
}
133141

134-
if matched && !invertFilter || !matched && invertFilter {
135-
shouldKill = true
136-
}
137-
case "#", "!#":
138-
invertFilter := tokens[2][0] == '!'
139-
140-
var containsTerm bool
141-
blacklistTokens := strings.Split(tokens[3], ",")
142-
for _, t := range blacklistTokens {
143-
if strings.Contains(entryTarget, t) {
144-
containsTerm = true
145-
break
146-
}
147-
}
148-
if containsTerm && !invertFilter || !containsTerm && invertFilter {
149-
shouldKill = true
142+
// We check what kind of comparator was given
143+
switch tokens[2] {
144+
case "=~", "!~":
145+
invertFilter := tokens[2][0] == '!'
146+
147+
matched, err := regexp.MatchString(tokens[3], entryTarget)
148+
if err != nil {
149+
level.Error(s.l).Log("err", err)
150+
}
151+
152+
if matched && !invertFilter || !matched && invertFilter {
153+
shouldKill = true
154+
}
155+
case "#", "!#":
156+
invertFilter := tokens[2][0] == '!'
157+
158+
var containsTerm bool
159+
blacklistTokens := strings.Split(tokens[3], ",")
160+
for _, t := range blacklistTokens {
161+
if strings.Contains(entryTarget, t) {
162+
containsTerm = true
163+
break
150164
}
151165
}
166+
if containsTerm && !invertFilter || !containsTerm && invertFilter {
167+
shouldKill = true
168+
}
152169
}
153170
return shouldKill
154171
}

rules/github_repository.go

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ import (
44
"bufio"
55
"net/http"
66
"sync"
7+
8+
"github.com/go-kit/kit/log"
9+
"github.com/go-kit/kit/log/level"
710
)
811

912
type githubRepository struct {
@@ -28,7 +31,7 @@ func (r *githubRepository) Rules() []Rule {
2831
}
2932

3033
// FetchRules parses a remote killfile to get all rules
31-
func (r *githubRepository) FetchRules(location string) ([]Rule, error) {
34+
func (r *githubRepository) FetchRules(location string, l log.Logger) ([]Rule, error) {
3235
resp, err := r.c.Get(location)
3336
if err != nil {
3437
return nil, err
@@ -39,20 +42,26 @@ func (r *githubRepository) FetchRules(location string) ([]Rule, error) {
3942
for scanner.Scan() {
4043
matches := reRuleSplitter.FindStringSubmatch(scanner.Text())
4144
if len(matches) == 4 {
42-
rules = append(rules, Rule{
43-
Command: matches[1],
44-
URL: matches[2],
45-
FilterExpression: matches[3],
46-
})
45+
// Verify that matches[3] (soon to be FilterExpression) is legit before we save the rule
46+
tokens := filterEntryRegex.FindStringSubmatch(matches[3])
47+
if tokens == nil || len(tokens) != 4 {
48+
level.Error(l).Log("err", "invalid filter expression", "expression", matches[3])
49+
} else {
50+
rules = append(rules, Rule{
51+
Command: matches[1],
52+
URL: matches[2],
53+
FilterExpression: matches[3],
54+
})
55+
}
4756
}
4857
}
4958

5059
return rules, scanner.Err()
5160
}
5261

5362
// RefreshRules fetches the new rules and updates the local cache
54-
func (r *githubRepository) RefreshRules(location string) error {
55-
rules, err := r.FetchRules(location)
63+
func (r *githubRepository) RefreshRules(location string, l log.Logger) error {
64+
rules, err := r.FetchRules(location, l)
5665
if err != nil {
5766
return err
5867
}

0 commit comments

Comments
 (0)