Skip to content

Commit

Permalink
Apply rules more selectively; add selection options
Browse files Browse the repository at this point in the history
Fixes dewey#13 - all rules in the killfile were being applied to all feeds that had any rule specified, regardless if a given rule applied to a specific feed.

Also adds the ability to set Filter Expressions involving post authors, and to list Miniflux categories instead of a specific feed URL, in killfiles.
  • Loading branch information
itohsnap committed Feb 27, 2023
1 parent 5fe77bd commit 80f3c3a
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 95 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ ignore-article "<feed>" "<filterexpr>"

This contains the URL of the feed that should be matched. It fuzzy matches the URL so if you only have one feed just use the base URL of the site. Example: `https://example.com` if the feed is on `https://example.com/rss/atom.xml`. A wildcard selector of `*` is also supported instead of the URL.

Alternately, you may specify a comma-separated list of categories whose feeds should be matched by starting the value with `category:`. Example: `category:Photos`.

### `<filterexpr>` Filter Expressions

From the [available rule set](https://newsboat.org/releases/2.15/docs/newsboat.html#_filter_language) and attributes (`Table 5. Available Attributes`) only a small subset are supported right now. These should cover most use cases already though.
Expand All @@ -30,6 +32,7 @@ From the [available rule set](https://newsboat.org/releases/2.15/docs/newsboat.h

- `title`
- `content`
- `author`

**Comparison Operators**

Expand Down Expand Up @@ -59,6 +62,12 @@ This one filters out all feed items that have the word `lunar` OR `moon` in ther
ignore-article "https://xkcd.com/atom.xml" "title =~ (?i)(lunAR|MOON)"
```

This one marks read all feed items without an image in feeds assigned a category of `Photos`.
```
ignore-article "category:Photos" "content !~ (?i)(img src=)"
```

### Testing rules

There are tests in `filter/` that can be used to easily test rules or add new comparison operators.
Expand All @@ -67,6 +76,8 @@ There are tests in `filter/` that can be used to easily test rules or add new co

There are the environment variables that can be set. If you want to use a local file you can set `MF_KILLFILE_PATH="~/path/to/killfile"`. A local killfile always overwrites a remote one, even if the remote killfile URL is set (`MF_KILLFILE_URL`). `MF_USERNAME`, `MF_PASSWORD` and `MF_API_ENDPOINT` are your Miniflux credentials. If `MF_REFRESH_INTERVAL` isn't set it's running on every 30 minutes of every hour (`0 30 * * * *`).

Note that `MF_KILLFILE_REFRESH_HOURS` is currently only supported for remote killfiles. You'll need to restart miniflux-sidekick to get it to recognize an updated local killfile.

```
export MF_ENVIRONMENT=development
export MF_PORT=8181
Expand Down
6 changes: 3 additions & 3 deletions cmd/api/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func main() {
level.Error(l).Log("err", err)
return
}
parsedRules, err := localRepo.FetchRules(*killfilePath)
parsedRules, err := localRepo.FetchRules(*killfilePath, l)
if err != nil {
level.Error(l).Log("err", err)
return
Expand All @@ -119,7 +119,7 @@ func main() {
level.Error(l).Log("err", err)
return
}
parsedRules, err := githubRepo.FetchRules(*killfileURL)
parsedRules, err := githubRepo.FetchRules(*killfileURL, l)
if err != nil {
level.Error(l).Log("err", err)
return
Expand All @@ -139,7 +139,7 @@ func main() {
for {
select {
case <-ticker.C:
if err := githubRepo.RefreshRules(*killfileURL); err != nil {
if err := githubRepo.RefreshRules(*killfileURL, l); err != nil {
level.Error(l).Log("err", err)
}
}
Expand Down
169 changes: 94 additions & 75 deletions filter/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func (s *service) Run() {
s.RunFilterJob(false)
}

// NOTE (DIRTY HACK): this next var has also been defined in rules/rules.go. If the regex is updated here, it should be updated there as well
var filterEntryRegex = regexp.MustCompile(`(\w+?) (\S+?) (.+)`)

func (s *service) RunFilterJob(simulation bool) {
Expand All @@ -44,9 +45,13 @@ func (s *service) RunFilterJob(simulation bool) {
level.Error(s.l).Log("err", err)
return
}

feedLoop:
for _, feed := range f {
// Check if the feed matches one of our rules
var found bool
var entries *miniflux.EntryResultSet

for _, rule := range s.rulesRepository.Rules() {
// Also support the wildcard selector
if rule.URL == "*" {
Expand All @@ -55,98 +60,112 @@ func (s *service) RunFilterJob(simulation bool) {
if strings.Contains(feed.FeedURL, rule.URL) {
found = true
}
}
if !found {
continue
}

// We then get all the unread entries of the feed that matches our rule
entries, err := s.client.FeedEntries(feed.ID, &miniflux.Filter{
Status: miniflux.EntryStatusUnread,
})
if err != nil {
level.Error(s.l).Log("err", err)
continue
}

// We then check if the entry title matches a rule, if it matches we set it to "read" so we don't see it any more
var matchedEntries []int64
for _, entry := range entries.Entries {
if s.evaluateRules(entry) {
level.Info(s.l).Log("msg", "entry matches rules in the killfile", "entry_id", entry.ID, "feed_id", feed.ID)
matchedEntries = append(matchedEntries, entry.ID)
// Alt: Instead of a URL, specify "category:" followed by a comma-separated list of Miniflux categories to add a rule that affects every feed in those categories.
if strings.EqualFold(rule.URL[0:9], "category:") {
categoryTokens := strings.Split(rule.URL[9:], ",")
for _, ct := range categoryTokens {
if strings.EqualFold(feed.Category.Title, strings.TrimSpace(ct)) {
found = true
break
}
}
}
}
if simulation {
for _, me := range matchedEntries {
e, err := s.client.Entry(me)
if !found {
continue
}

if entries == nil {
// Get all the unread entries of the feed that matches our rule. Only need to do this once per feed
entries, err = s.client.FeedEntries(feed.ID, &miniflux.Filter{
Status: miniflux.EntryStatusUnread,
})
if err != nil {
level.Error(s.l).Log("err", err)
return
continue feedLoop // failure to load entries => move to next feed
}
level.Info(s.l).Log("msg", "would set status to read", "entry_id", me, "entry_title", e.Title)
}
} else {
for _, me := range matchedEntries {
level.Info(s.l).Log("msg", "set status to read", "entry_id", me)
if err := s.client.UpdateEntries([]int64{me}, miniflux.EntryStatusRead); err != nil {
level.Error(s.l).Log("msg", "error on updating the feed entries", "ids", me, "err", err)
return

// We then check if the entry title matches a rule, if it matches we set it to "read" so we don't see it any more
var matchedEntries []int64
for _, entry := range entries.Entries {
if s.evaluateRule(entry, rule) {
level.Info(s.l).Log("msg", "entry matches rules in the killfile", "entry_id", entry.ID, "feed_id", feed.ID)
matchedEntries = append(matchedEntries, entry.ID)
}
}
}
if len(matchedEntries) > 0 {
level.Info(s.l).Log("msg", "marked all matched feed items as read", "affected", len(matchedEntries))

if simulation {
for _, me := range matchedEntries {
e, err := s.client.Entry(me)
if err != nil {
level.Error(s.l).Log("err", err)
return
}
level.Info(s.l).Log("msg", "would set status to read", "entry_id", me, "entry_title", e.Title)
}
} else {
for _, me := range matchedEntries {
level.Info(s.l).Log("msg", "set status to read", "entry_id", me)
if err := s.client.UpdateEntries([]int64{me}, miniflux.EntryStatusRead); err != nil {
level.Error(s.l).Log("msg", "error on updating the feed entries", "ids", me, "err", err)
return
}
}
}
if len(matchedEntries) > 0 {
level.Info(s.l).Log("msg", "marked all matched feed items as read", "affected", len(matchedEntries))
}

}
}
}

// evaluateRules checks a feed items against the available rules. It returns wheater this entry should be killed or not.
func (s service) evaluateRules(entry *miniflux.Entry) bool {
// evaluateRule checks a feed item against a particular rule. It returns whether this entry should be killed or not.
func (s service) evaluateRule(entry *miniflux.Entry, rule rules.Rule) bool {
var shouldKill bool
for _, rule := range s.rulesRepository.Rules() {
tokens := filterEntryRegex.FindStringSubmatch(rule.FilterExpression)
if tokens == nil || len(tokens) != 4 {
level.Error(s.l).Log("err", "invalid filter expression", "expression", rule.FilterExpression)
continue
}
// We set the string we want to compare against (https://newsboat.org/releases/2.15/docs/newsboat.html#_filter_language are supported in the killfile format)
var entryTarget string
switch tokens[1] {
case "title":
entryTarget = entry.Title
case "description":
entryTarget = entry.Content
}

// We check what kind of comparator was given
switch tokens[2] {
case "=~", "!~":
invertFilter := tokens[2][0] == '!'
// The next line should succeed; we tested it would when we loaded our rules
tokens := filterEntryRegex.FindStringSubmatch(rule.FilterExpression)

matched, err := regexp.MatchString(tokens[3], entryTarget)
if err != nil {
level.Error(s.l).Log("err", err)
}
// We set the string we want to compare against (https://newsboat.org/releases/2.15/docs/newsboat.html#_filter_language are supported in the killfile format)
var entryTarget string
switch tokens[1] {
case "title":
entryTarget = entry.Title
case "content", "description":
// include "description" for backwards compatibility with existing killfiles; nobody should be marking entries as read based on the feed's general description
entryTarget = entry.Content
case "author":
entryTarget = entry.Author
}

if matched && !invertFilter || !matched && invertFilter {
shouldKill = true
}
case "#", "!#":
invertFilter := tokens[2][0] == '!'

var containsTerm bool
blacklistTokens := strings.Split(tokens[3], ",")
for _, t := range blacklistTokens {
if strings.Contains(entryTarget, t) {
containsTerm = true
break
}
}
if containsTerm && !invertFilter || !containsTerm && invertFilter {
shouldKill = true
// We check what kind of comparator was given
switch tokens[2] {
case "=~", "!~":
invertFilter := tokens[2][0] == '!'

matched, err := regexp.MatchString(tokens[3], entryTarget)
if err != nil {
level.Error(s.l).Log("err", err)
}

if matched && !invertFilter || !matched && invertFilter {
shouldKill = true
}
case "#", "!#":
invertFilter := tokens[2][0] == '!'

var containsTerm bool
blacklistTokens := strings.Split(tokens[3], ",")
for _, t := range blacklistTokens {
if strings.Contains(entryTarget, t) {
containsTerm = true
break
}
}
if containsTerm && !invertFilter || !containsTerm && invertFilter {
shouldKill = true
}
}
return shouldKill
}
25 changes: 17 additions & 8 deletions rules/github_repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"bufio"
"net/http"
"sync"

"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
)

type githubRepository struct {
Expand All @@ -28,7 +31,7 @@ func (r *githubRepository) Rules() []Rule {
}

// FetchRules parses a remote killfile to get all rules
func (r *githubRepository) FetchRules(location string) ([]Rule, error) {
func (r *githubRepository) FetchRules(location string, l log.Logger) ([]Rule, error) {
resp, err := r.c.Get(location)
if err != nil {
return nil, err
Expand All @@ -39,20 +42,26 @@ func (r *githubRepository) FetchRules(location string) ([]Rule, error) {
for scanner.Scan() {
matches := reRuleSplitter.FindStringSubmatch(scanner.Text())
if len(matches) == 4 {
rules = append(rules, Rule{
Command: matches[1],
URL: matches[2],
FilterExpression: matches[3],
})
// Verify that matches[3] (soon to be FilterExpression) is legit before we save the rule
tokens := filterEntryRegex.FindStringSubmatch(matches[3])
if tokens == nil || len(tokens) != 4 {
level.Error(l).Log("err", "invalid filter expression", "expression", matches[3])
} else {
rules = append(rules, Rule{
Command: matches[1],
URL: matches[2],
FilterExpression: matches[3],
})
}
}
}

return rules, scanner.Err()
}

// RefreshRules fetches the new rules and updates the local cache
func (r *githubRepository) RefreshRules(location string) error {
rules, err := r.FetchRules(location)
func (r *githubRepository) RefreshRules(location string, l log.Logger) error {
rules, err := r.FetchRules(location, l)
if err != nil {
return err
}
Expand Down
23 changes: 16 additions & 7 deletions rules/local_repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"bufio"
"os"
"sync"

"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
)

type localRepository struct {
Expand All @@ -25,7 +28,7 @@ func (r *localRepository) Rules() []Rule {
}

// FetchRules parses a local killfile to get all rules
func (r *localRepository) FetchRules(location string) ([]Rule, error) {
func (r *localRepository) FetchRules(location string, l log.Logger) ([]Rule, error) {
file, err := os.Open(location)
if err != nil {
return nil, err
Expand All @@ -37,18 +40,24 @@ func (r *localRepository) FetchRules(location string) ([]Rule, error) {
for scanner.Scan() {
matches := reRuleSplitter.FindStringSubmatch(scanner.Text())
if len(matches) == 4 {
rules = append(rules, Rule{
Command: matches[1],
URL: matches[2],
FilterExpression: matches[3],
})
// Verify that matches[3] (soon to be FilterExpression) is legit before we save the rule
tokens := filterEntryRegex.FindStringSubmatch(matches[3])
if tokens == nil || len(tokens) != 4 {
level.Error(l).Log("err", "invalid filter expression", "expression", matches[3])
} else {
rules = append(rules, Rule{
Command: matches[1],
URL: matches[2],
FilterExpression: matches[3],
})
}
}
}
return rules, scanner.Err()
}

// RefreshRules for local repositories isn't implemented yet.
func (r *localRepository) RefreshRules(location string) error {
func (r *localRepository) RefreshRules(location string, l log.Logger) error {
return nil
}

Expand Down
Loading

0 comments on commit 80f3c3a

Please sign in to comment.