diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a0a3cc8f8..2f48e0510 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -31,7 +31,7 @@ jobs: uses: actions/setup-go@v6 if: ${{ env.NeedToSync }} with: - go-version-file: ./go.mod + go-version: '1.23' - name: Checkout v2fly/domain-list-community if: ${{ env.NeedToSync }} @@ -46,10 +46,60 @@ jobs: echo "include:geolocation-!cn @cn" >> ./domain-list-community/data/cn echo "include:geolocation-cn @!cn" >> ./domain-list-community/data/geolocation-\!cn - - name: Get dependencies and run + - name: Create config.json if: ${{ env.NeedToSync }} run: | - go run ./ --datapath=./domain-list-community/data + cat > config.json <<'EOF' + { + "input": [ + { + "type": "domainlist", + "action": "add", + "args": { + "dataDir": "./domain-list-community/data" + } + } + ], + "output": [ + { + "type": "v2rayGeoSite", + "action": "output", + "args": { + "outputDir": "./publish", + "outputName": "geosite.dat", + "excludeAttrs": "cn@!cn@ads,geolocation-cn@!cn@ads,geolocation-!cn@cn@ads", + "gfwlistOutput": "geolocation-!cn" + } + }, + { + "type": "text", + "action": "output", + "args": { + "outputDir": "./publish", + "wantedList": [ + "category-ads-all", + "tld-cn", + "cn", + "geolocation-cn", + "tld-!cn", + "geolocation-!cn", + "private", + "apple", + "icloud", + "google", + "steam" + ] + } + } + ] + } + EOF + + - name: Build and run + if: ${{ env.NeedToSync }} + run: | + go build -v -o domain-list-custom + ./domain-list-custom convert -c config.json - name: Generate sha256 hashsum if: ${{ env.NeedToSync }} diff --git a/.gitignore b/.gitignore index 66fd13c90..dd15510fd 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ *.dll *.so *.dylib +domain-list-custom # Test binary, built with `go test -c` *.test @@ -11,5 +12,9 @@ # Output of the go coverage tool, specifically when used with LiteIDE *.out +# Output directories +/output +/publish + # Dependency directories (remove the comment below to include it) # vendor/ diff --git a/README.md b/README.md index 0bdf19a1d..617045083 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ 基于 [v2fly/domain-list-community#256](https://github.com/v2fly/domain-list-community/issues/256) 的提议,重构 [v2fly/domain-list-community](https://github.com/v2fly/domain-list-community) 的构建流程,并添加新功能。 +本项目采用插件化架构,参考 [Loyalsoldier/geoip](https://github.com/Loyalsoldier/geoip) 项目的设计模式,提供命令行界面(CLI)工具,支持灵活的配置文件,方便用户自定义域名列表的处理和输出。 + ## 与官方版 `dlc.dat` 不同之处 - 将 `dlc.dat` 重命名为 `geosite.dat` @@ -13,6 +15,81 @@ [https://github.com/Loyalsoldier/domain-list-custom/releases/latest/download/geosite.dat](https://github.com/Loyalsoldier/domain-list-custom/releases/latest/download/geosite.dat) +## 使用方法 + +### 命令行工具 + +```bash +# 查看帮助 +./domain-list-custom --help + +# 使用配置文件进行转换 +./domain-list-custom convert -c config.json + +# 使用远程配置文件 +./domain-list-custom convert -c https://example.com/config.json +``` + +### 配置文件 + +配置文件采用 JSON 格式,包含 `input` 和 `output` 两个部分: + +```json +{ + "input": [ + { + "type": "domainlist", + "action": "add", + "args": { + "dataDir": "./data" + } + } + ], + "output": [ + { + "type": "v2rayGeoSite", + "action": "output", + "args": { + "outputDir": "./output", + "outputName": "geosite.dat", + "excludeAttrs": "cn@!cn@ads,geolocation-cn@!cn@ads,geolocation-!cn@cn@ads", + "gfwlistOutput": "geolocation-!cn" + } + }, + { + "type": "text", + "action": "output", + "args": { + "outputDir": "./output", + "wantedList": ["cn", "google", "apple"] + } + } + ] +} +``` + +更多配置示例请参考 `config.example.json`。 + +## 项目结构 + +``` +. +├── lib/ # 核心库 +│ ├── lib.go # 接口定义 +│ ├── config.go # 配置解析 +│ ├── container.go # 数据容器 +│ ├── entry.go # 条目管理 +│ ├── instance.go # 实例管理 +│ └── common.go # 通用函数 +├── plugin/ # 插件目录 +│ ├── plaintext/ # 文本格式插件 +│ └── v2ray/ # V2Ray 格式插件 +├── main.go # 主程序入口 +├── convert.go # 转换命令 +├── init.go # 插件注册 +└── config.json # 配置文件 +``` + ## 使用本项目的项目 [@Loyalsoldier/v2ray-rules-dat](https://github.com/Loyalsoldier/v2ray-rules-dat) diff --git a/common.go b/common.go deleted file mode 100644 index dff2c13ec..000000000 --- a/common.go +++ /dev/null @@ -1,113 +0,0 @@ -package main - -import ( - "fmt" - "go/build" - "os" - "path/filepath" - "strings" -) - -type fileName string - -type attribute string - -// GetDataDir returns the path to the "data" directory used to generate lists. -// Usage order: -// 1. The datapath that user set when running the program -// 2. The default path "./data" (data directory in the current working directory) if exists -// 3. The path to the data directory of project `v2fly/domain-list-community` in GOPATH mode -func GetDataDir() string { - if *dataPath != "" { // Use dataPath option if set by user - fmt.Printf("Use domain list files in '%s' directory.\n", *dataPath) - return *dataPath - } - - defaultDataDir := filepath.Join("./", "data") - if _, err := os.Stat(defaultDataDir); !os.IsNotExist(err) { // Use "./data" directory if exists - fmt.Printf("Use domain list files in '%s' directory.\n", defaultDataDir) - return defaultDataDir - } - - return filepath.Join(GetGOPATH(), "src", "github.com", "v2fly", "domain-list-community", "data") -} - -// envFile returns the name of the Go environment configuration file. -// Copy from https://github.com/golang/go/blob/c4f2a9788a7be04daf931ac54382fbe2cb754938/src/cmd/go/internal/cfg/cfg.go#L150-L166 -func envFile() (string, error) { - if file := os.Getenv("GOENV"); file != "" { - if file == "off" { - return "", fmt.Errorf("GOENV=off") - } - return file, nil - } - dir, err := os.UserConfigDir() - if err != nil { - return "", err - } - if dir == "" { - return "", fmt.Errorf("missing user-config dir") - } - return filepath.Join(dir, "go", "env"), nil -} - -// GetRuntimeEnv returns the value of runtime environment variable, -// that is set by running following command: `go env -w key=value`. -func GetRuntimeEnv(key string) (string, error) { - file, err := envFile() - if err != nil { - return "", err - } - if file == "" { - return "", fmt.Errorf("missing runtime env file") - } - var data []byte - var runtimeEnv string - data, readErr := os.ReadFile(file) - if readErr != nil { - return "", readErr - } - envStrings := strings.Split(string(data), "\n") - for _, envItem := range envStrings { - envItem = strings.TrimSuffix(envItem, "\r") - envKeyValue := strings.Split(envItem, "=") - if strings.EqualFold(strings.TrimSpace(envKeyValue[0]), key) { - runtimeEnv = strings.TrimSpace(envKeyValue[1]) - } - } - return runtimeEnv, nil -} - -// GetGOPATH returns GOPATH environment variable as a string. It will NOT be empty. -func GetGOPATH() string { - // The one set by user explicitly by `export GOPATH=/path` or `env GOPATH=/path command` - GOPATH := os.Getenv("GOPATH") - if GOPATH == "" { - var err error - // The one set by user by running `go env -w GOPATH=/path` - GOPATH, err = GetRuntimeEnv("GOPATH") - if err != nil { - // The default one that Golang uses - return build.Default.GOPATH - } - if GOPATH == "" { - return build.Default.GOPATH - } - return GOPATH - } - return GOPATH -} - -// isEmpty checks if the rule that has been trimmed out spaces is empty -func isEmpty(s string) bool { - return len(strings.TrimSpace(s)) == 0 -} - -// removeComment removes comments in the rule -func removeComment(line string) string { - idx := strings.Index(line, "#") - if idx == -1 { - return line - } - return strings.TrimSpace(line[:idx]) -} diff --git a/config.example.json b/config.example.json new file mode 100644 index 000000000..a6797cef3 --- /dev/null +++ b/config.example.json @@ -0,0 +1,44 @@ +{ + "input": [ + { + "type": "domainlist", + "action": "add", + "args": { + "dataDir": "./data", + "wantedList": [] + } + } + ], + "output": [ + { + "type": "v2rayGeoSite", + "action": "output", + "args": { + "outputDir": "./output", + "outputName": "geosite.dat", + "excludeAttrs": "cn@!cn@ads,geolocation-cn@!cn@ads,geolocation-!cn@cn@ads", + "gfwlistOutput": "geolocation-!cn" + } + }, + { + "type": "text", + "action": "output", + "args": { + "outputDir": "./output", + "wantedList": [ + "category-ads-all", + "tld-cn", + "cn", + "geolocation-cn", + "tld-!cn", + "geolocation-!cn", + "private", + "apple", + "icloud", + "google", + "steam" + ] + } + } + ] +} diff --git a/config.json b/config.json new file mode 100644 index 000000000..addc271ce --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "input": [ + { + "type": "domainlist", + "action": "add", + "args": { + "dataDir": "./data" + } + } + ], + "output": [ + { + "type": "v2rayGeoSite", + "action": "output", + "args": { + "outputDir": "./output", + "outputName": "geosite.dat" + } + }, + { + "type": "text", + "action": "output", + "args": { + "outputDir": "./output" + } + } + ] +} diff --git a/configuration.md b/configuration.md new file mode 100644 index 000000000..98be07d8e --- /dev/null +++ b/configuration.md @@ -0,0 +1,250 @@ +# Configuration Guide + +This document describes how to configure domain-list-custom using the JSON configuration file. + +## Configuration File Structure + +The configuration file consists of two main sections: `input` and `output`. + +```json +{ + "input": [...], + "output": [...] +} +``` + +## Input Configuration + +### Domain List Input + +Type: `domainlist` + +Load domain lists from a directory. + +```json +{ + "type": "domainlist", + "action": "add", + "args": { + "dataDir": "./data", + "wantedList": [] + } +} +``` + +**Arguments:** +- `dataDir` (required): Path to the directory containing domain list files +- `wantedList` (optional): Array of specific domain lists to load. If empty, all lists are loaded. + +**Domain List File Format:** + +``` +# Comments start with # +domain.com # Domain without prefix = domain type +full:exact.domain.com # Full match +keyword:ads # Keyword match +regexp:.*tracker.* # Regular expression match + +# Attributes +domain.com @ads # Domain with single attribute +domain.com @ads @cn # Domain with multiple attributes + +# Inclusions +include:other-list # Include all domains from other-list +include:other-list @cn # Include only domains with @cn attribute from other-list +``` + +## Output Configuration + +### V2Ray GeoSite Output + +Type: `v2rayGeoSite` + +Generate V2Ray geosite.dat file and optionally GFWList. + +```json +{ + "type": "v2rayGeoSite", + "action": "output", + "args": { + "outputDir": "./output", + "outputName": "geosite.dat", + "wantedList": [], + "excludedList": [], + "excludeAttrs": "cn@!cn@ads,geolocation-cn@!cn@ads", + "gfwlistOutput": "geolocation-!cn" + } +} +``` + +**Arguments:** +- `outputDir` (optional): Output directory path. Default: `./output` +- `outputName` (optional): Output filename. Default: `geosite.dat` +- `wantedList` (optional): Array of lists to include. If empty, all lists are included. +- `excludedList` (optional): Array of lists to exclude. +- `excludeAttrs` (optional): Rules to exclude domains with specific attributes from specific lists. Format: `list@attr1@attr2,list2@attr3` +- `gfwlistOutput` (optional): Name of the list to generate as GFWList format. + +**Exclude Attributes Format:** + +To exclude domains from `cn` list that have `!cn` or `ads` attributes: +``` +cn@!cn@ads +``` + +Multiple lists: +``` +cn@!cn@ads,geolocation-cn@!cn@ads,geolocation-!cn@cn@ads +``` + +### Text Output + +Type: `text` + +Generate plaintext domain list files. + +```json +{ + "type": "text", + "action": "output", + "args": { + "outputDir": "./output", + "wantedList": ["cn", "google", "apple"], + "excludedList": [] + } +} +``` + +**Arguments:** +- `outputDir` (optional): Output directory path. Default: `./output` +- `wantedList` (optional): Array of lists to export. If empty, all lists are exported. +- `excludedList` (optional): Array of lists to exclude. + +**Output Format:** + +``` +domain:example.com +full:exact.domain.com +keyword:ads +regexp:.*tracker.* +domain:domain.com:@ads +domain:domain.com:@ads,@cn +``` + +## Complete Example + +```json +{ + "input": [ + { + "type": "domainlist", + "action": "add", + "args": { + "dataDir": "./data" + } + } + ], + "output": [ + { + "type": "v2rayGeoSite", + "action": "output", + "args": { + "outputDir": "./output", + "outputName": "geosite.dat", + "excludeAttrs": "cn@!cn@ads,geolocation-cn@!cn@ads,geolocation-!cn@cn@ads", + "gfwlistOutput": "geolocation-!cn" + } + }, + { + "type": "text", + "action": "output", + "args": { + "outputDir": "./output", + "wantedList": [ + "category-ads-all", + "cn", + "geolocation-cn", + "geolocation-!cn", + "google", + "apple" + ] + } + } + ] +} +``` + +## Usage + +```bash +# Use config.json in current directory +./domain-list-custom convert + +# Use specific config file +./domain-list-custom convert -c custom-config.json + +# Use remote config file +./domain-list-custom convert -c https://example.com/config.json + +# List available domain lists +./domain-list-custom list -c config.json +``` + +## Advanced Examples + +### Multiple Data Sources + +You can have multiple input configurations to load data from different sources: + +```json +{ + "input": [ + { + "type": "domainlist", + "action": "add", + "args": { + "dataDir": "./official-data" + } + }, + { + "type": "domainlist", + "action": "add", + "args": { + "dataDir": "./custom-data", + "wantedList": ["custom-list-1", "custom-list-2"] + } + } + ], + "output": [...] +} +``` + +### Selective Export + +Export only specific lists to different formats: + +```json +{ + "input": [...], + "output": [ + { + "type": "v2rayGeoSite", + "action": "output", + "args": { + "outputDir": "./output", + "outputName": "geosite-cn.dat", + "wantedList": ["cn", "geolocation-cn"] + } + }, + { + "type": "v2rayGeoSite", + "action": "output", + "args": { + "outputDir": "./output", + "outputName": "geosite-ads.dat", + "wantedList": ["category-ads-all"] + } + } + ] +} +``` diff --git a/convert.go b/convert.go new file mode 100644 index 000000000..fcb1d3081 --- /dev/null +++ b/convert.go @@ -0,0 +1,36 @@ +package main + +import ( + "log" + + "github.com/Loyalsoldier/domain-list-custom/lib" + "github.com/spf13/cobra" +) + +func init() { + rootCmd.AddCommand(convertCmd) + convertCmd.PersistentFlags().StringP("config", "c", "config.json", "URI of the JSON format config file, support both local file path and remote HTTP(S) URL") +} + +var convertCmd = &cobra.Command{ + Use: "convert", + Aliases: []string{"conv"}, + Short: "Convert domain list data from one format to another by using config file", + Run: func(cmd *cobra.Command, args []string) { + configFile, _ := cmd.Flags().GetString("config") + log.Println("Use config:", configFile) + + instance, err := lib.NewInstance() + if err != nil { + log.Fatal(err) + } + + if err := instance.InitConfig(configFile); err != nil { + log.Fatal(err) + } + + if err := instance.Run(); err != nil { + log.Fatal(err) + } + }, +} diff --git a/domain-list-custom b/domain-list-custom new file mode 100755 index 000000000..aac3b1d4e Binary files /dev/null and b/domain-list-custom differ diff --git a/go.mod b/go.mod index de54c9acb..0d4e6697e 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,9 @@ module github.com/Loyalsoldier/domain-list-custom -go 1.21 - -toolchain go1.21.10 +go 1.23 require ( + github.com/spf13/cobra v1.8.1 github.com/v2fly/v2ray-core/v5 v5.16.1 google.golang.org/protobuf v1.34.2 ) @@ -12,5 +11,7 @@ require ( require ( github.com/adrg/xdg v0.4.0 // indirect github.com/golang/protobuf v1.5.4 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect golang.org/x/sys v0.19.0 // indirect ) diff --git a/go.sum b/go.sum index 626343059..9f053c0a5 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,6 @@ github.com/adrg/xdg v0.4.0 h1:RzRqFcjH4nE5C6oTAxhBtoE2IRyjBSa62SCbyPidvls= github.com/adrg/xdg v0.4.0/go.mod h1:N6ag73EX4wyxeaoeHctc1mas01KZgsj5tYiAIwqJE/E= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -7,8 +8,15 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= diff --git a/init.go b/init.go new file mode 100644 index 000000000..22a713be4 --- /dev/null +++ b/init.go @@ -0,0 +1,6 @@ +package main + +import ( + _ "github.com/Loyalsoldier/domain-list-custom/plugin/plaintext" + _ "github.com/Loyalsoldier/domain-list-custom/plugin/v2ray" +) diff --git a/lib/common.go b/lib/common.go new file mode 100644 index 000000000..8099bd499 --- /dev/null +++ b/lib/common.go @@ -0,0 +1,19 @@ +package lib + +import ( + "strings" +) + +// IsEmpty checks if a string is empty after trimming spaces +func IsEmpty(s string) bool { + return len(strings.TrimSpace(s)) == 0 +} + +// RemoveComment removes comments from a line +func RemoveComment(line string) string { + idx := strings.Index(line, "#") + if idx == -1 { + return line + } + return strings.TrimSpace(line[:idx]) +} diff --git a/lib/config.go b/lib/config.go new file mode 100644 index 000000000..007f3bd71 --- /dev/null +++ b/lib/config.go @@ -0,0 +1,132 @@ +package lib + +import ( + "encoding/json" + "errors" + "fmt" + "strings" +) + +var ( + inputConfigCreatorCache = make(map[string]inputConfigCreator) + outputConfigCreatorCache = make(map[string]outputConfigCreator) + inputConverterCache = make(map[string]InputConverter) + outputConverterCache = make(map[string]OutputConverter) +) + +type inputConfigCreator func(Action, json.RawMessage) (InputConverter, error) + +type outputConfigCreator func(Action, json.RawMessage) (OutputConverter, error) + +func RegisterInputConfigCreator(id string, fn inputConfigCreator) error { + id = strings.ToLower(id) + if _, found := inputConfigCreatorCache[id]; found { + return errors.New("config creator has already been registered") + } + inputConfigCreatorCache[id] = fn + return nil +} + +func createInputConfig(id string, action Action, data json.RawMessage) (InputConverter, error) { + id = strings.ToLower(id) + fn, found := inputConfigCreatorCache[id] + if !found { + return nil, errors.New("unknown config type") + } + return fn(action, data) +} + +func RegisterOutputConfigCreator(id string, fn outputConfigCreator) error { + id = strings.ToLower(id) + if _, found := outputConfigCreatorCache[id]; found { + return errors.New("config creator has already been registered") + } + outputConfigCreatorCache[id] = fn + return nil +} + +func createOutputConfig(id string, action Action, data json.RawMessage) (OutputConverter, error) { + id = strings.ToLower(id) + fn, found := outputConfigCreatorCache[id] + if !found { + return nil, errors.New("unknown config type") + } + return fn(action, data) +} + +func RegisterInputConverter(id string, converter InputConverter) error { + id = strings.ToLower(id) + if _, found := inputConverterCache[id]; found { + return errors.New("converter has already been registered") + } + inputConverterCache[id] = converter + return nil +} + +func RegisterOutputConverter(id string, converter OutputConverter) error { + id = strings.ToLower(id) + if _, found := outputConverterCache[id]; found { + return errors.New("converter has already been registered") + } + outputConverterCache[id] = converter + return nil +} + +// Config is the configuration for converting domain lists +type Config struct { + Input []ConfigItem `json:"input"` + Output []ConfigItem `json:"output"` +} + +// ConfigItem is a single input or output configuration +type ConfigItem struct { + Type string `json:"type"` + Action string `json:"action"` + Args json.RawMessage `json:"args"` +} + +// UnmarshalJSON unmarshals a ConfigItem from JSON +func (c *ConfigItem) UnmarshalJSON(data []byte) error { + var tmp struct { + Type string `json:"type"` + Action string `json:"action"` + Args json.RawMessage `json:"args"` + } + + if err := json.Unmarshal(data, &tmp); err != nil { + return err + } + + tmp.Type = strings.TrimSpace(tmp.Type) + tmp.Action = strings.TrimSpace(tmp.Action) + + if tmp.Type == "" { + return fmt.Errorf("type is required") + } + if tmp.Action == "" { + return fmt.Errorf("action is required") + } + + action := Action(strings.ToLower(tmp.Action)) + if !ActionsRegistry[action] { + return fmt.Errorf("unknown action: %s", tmp.Action) + } + + c.Type = tmp.Type + c.Action = tmp.Action + c.Args = tmp.Args + + return nil +} + +// GetInputConverter returns an InputConverter for the ConfigItem +func (c *ConfigItem) GetInputConverter() (InputConverter, error) { + action := Action(strings.ToLower(c.Action)) + return createInputConfig(c.Type, action, c.Args) +} + +// GetOutputConverter returns an OutputConverter for the ConfigItem +func (c *ConfigItem) GetOutputConverter() (OutputConverter, error) { + action := Action(strings.ToLower(c.Action)) + return createOutputConfig(c.Type, action, c.Args) +} diff --git a/lib/container.go b/lib/container.go new file mode 100644 index 000000000..adc2c0698 --- /dev/null +++ b/lib/container.go @@ -0,0 +1,117 @@ +package lib + +import ( + "fmt" + "iter" + "strings" + "sync" +) + +// Container is a container for domain list entries +type Container interface { + Add(entry *Entry) error + Get(name string) (*Entry, bool) + GetEntry(name string) (*Entry, bool) + Has(name string) bool + Loop() iter.Seq[*Entry] + Len() int + GetNames() []string +} + +// SimpleContainer is a simple implementation of Container +type SimpleContainer struct { + entries map[string]*Entry + mu sync.RWMutex +} + +// NewSimpleContainer creates a new SimpleContainer +func NewSimpleContainer() *SimpleContainer { + return &SimpleContainer{ + entries: make(map[string]*Entry), + } +} + +// Add adds an entry to the container +func (c *SimpleContainer) Add(entry *Entry) error { + if entry == nil { + return fmt.Errorf("entry is nil") + } + + name := strings.ToUpper(strings.TrimSpace(entry.GetName())) + if name == "" { + return fmt.Errorf("entry name is empty") + } + + c.mu.Lock() + defer c.mu.Unlock() + + if existing, found := c.entries[name]; found { + // Merge domains + existing.AddDomains(entry.GetDomains()) + } else { + c.entries[name] = entry + } + + return nil +} + +// Get retrieves an entry by name +func (c *SimpleContainer) Get(name string) (*Entry, bool) { + return c.GetEntry(name) +} + +// GetEntry retrieves an entry by name +func (c *SimpleContainer) GetEntry(name string) (*Entry, bool) { + name = strings.ToUpper(strings.TrimSpace(name)) + + c.mu.RLock() + defer c.mu.RUnlock() + + entry, found := c.entries[name] + return entry, found +} + +// Has checks if an entry exists +func (c *SimpleContainer) Has(name string) bool { + name = strings.ToUpper(strings.TrimSpace(name)) + + c.mu.RLock() + defer c.mu.RUnlock() + + _, found := c.entries[name] + return found +} + +// Loop iterates over all entries +func (c *SimpleContainer) Loop() iter.Seq[*Entry] { + return func(yield func(*Entry) bool) { + c.mu.RLock() + defer c.mu.RUnlock() + + for _, entry := range c.entries { + if !yield(entry) { + return + } + } + } +} + +// Len returns the number of entries +func (c *SimpleContainer) Len() int { + c.mu.RLock() + defer c.mu.RUnlock() + + return len(c.entries) +} + +// GetNames returns all entry names +func (c *SimpleContainer) GetNames() []string { + c.mu.RLock() + defer c.mu.RUnlock() + + names := make([]string, 0, len(c.entries)) + for name := range c.entries { + names = append(names, name) + } + return names +} diff --git a/lib/entry.go b/lib/entry.go new file mode 100644 index 000000000..41a1124a6 --- /dev/null +++ b/lib/entry.go @@ -0,0 +1,81 @@ +package lib + +import ( + "strings" + + router "github.com/v2fly/v2ray-core/v5/app/router/routercommon" +) + +// Entry is a single domain list entry +type Entry struct { + Name string + Domains []*router.Domain +} + +// NewEntry creates a new Entry +func NewEntry(name string) *Entry { + name = strings.ToUpper(strings.TrimSpace(name)) + return &Entry{ + Name: name, + Domains: make([]*router.Domain, 0), + } +} + +// GetName returns the name of the entry +func (e *Entry) GetName() string { + return e.Name +} + +// AddDomain adds a domain to the entry +func (e *Entry) AddDomain(domain *router.Domain) { + if domain != nil { + e.Domains = append(e.Domains, domain) + } +} + +// AddDomains adds multiple domains to the entry +func (e *Entry) AddDomains(domains []*router.Domain) { + for _, domain := range domains { + e.AddDomain(domain) + } +} + +// GetDomains returns all domains in the entry +func (e *Entry) GetDomains() []*router.Domain { + return e.Domains +} + +// MarshalText converts the entry to text format +func (e *Entry) MarshalText() ([]byte, error) { + result := make([]byte, 0, 1024*512) + + for _, domain := range e.Domains { + ruleVal := strings.TrimSpace(domain.GetValue()) + if len(ruleVal) == 0 { + continue + } + + var ruleString string + switch domain.Type { + case router.Domain_Full: + ruleString = "full:" + ruleVal + case router.Domain_RootDomain: + ruleString = "domain:" + ruleVal + case router.Domain_Plain: + ruleString = "keyword:" + ruleVal + case router.Domain_Regex: + ruleString = "regexp:" + ruleVal + } + + if len(domain.Attribute) > 0 { + ruleString += ":" + for _, attr := range domain.Attribute { + ruleString += "@" + attr.GetKey() + "," + } + ruleString = strings.TrimRight(ruleString, ",") + } + result = append(result, []byte(ruleString+"\n")...) + } + + return result, nil +} diff --git a/lib/instance.go b/lib/instance.go new file mode 100644 index 000000000..b61574024 --- /dev/null +++ b/lib/instance.go @@ -0,0 +1,119 @@ +package lib + +import ( + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "os" + "strings" +) + +// Instance is the main instance for converting domain lists +type Instance struct { + Config *Config + Container Container +} + +// NewInstance creates a new Instance +func NewInstance() (*Instance, error) { + return &Instance{ + Container: NewSimpleContainer(), + }, nil +} + +// InitConfig initializes the instance with a config file +func (i *Instance) InitConfig(configFile string) error { + var configBytes []byte + var err error + + configFile = strings.TrimSpace(configFile) + if configFile == "" { + return fmt.Errorf("config file is required") + } + + // Check if it's a URL or local file + if strings.HasPrefix(strings.ToLower(configFile), "http://") || + strings.HasPrefix(strings.ToLower(configFile), "https://") { + // Download from URL + resp, err := http.Get(configFile) + if err != nil { + return fmt.Errorf("failed to download config: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("failed to download config: status code %d", resp.StatusCode) + } + + configBytes, err = io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("failed to read config: %w", err) + } + } else { + // Read from local file + configBytes, err = os.ReadFile(configFile) + if err != nil { + return fmt.Errorf("failed to read config file: %w", err) + } + } + + // Parse config + var config Config + if err := json.Unmarshal(configBytes, &config); err != nil { + return fmt.Errorf("failed to parse config: %w", err) + } + + i.Config = &config + return nil +} + +// Run runs the conversion process +func (i *Instance) Run() error { + if i.Config == nil { + return fmt.Errorf("config is not initialized") + } + + // Process input + log.Println("Processing input...") + for idx, inputConfig := range i.Config.Input { + log.Printf(" [%d/%d] type: %s, action: %s", idx+1, len(i.Config.Input), inputConfig.Type, inputConfig.Action) + + converter, err := inputConfig.GetInputConverter() + if err != nil { + return fmt.Errorf("failed to get input converter: %w", err) + } + + newContainer, err := converter.Input(i.Container) + if err != nil { + return fmt.Errorf("failed to process input [type: %s, action: %s]: %w", + inputConfig.Type, inputConfig.Action, err) + } + + if newContainer != nil { + i.Container = newContainer + } + } + + log.Printf("Processed %d entries\n", i.Container.Len()) + + // Process output + log.Println("Processing output...") + for idx, outputConfig := range i.Config.Output { + log.Printf(" [%d/%d] type: %s, action: %s", idx+1, len(i.Config.Output), outputConfig.Type, outputConfig.Action) + + converter, err := outputConfig.GetOutputConverter() + if err != nil { + return fmt.Errorf("failed to get output converter: %w", err) + } + + if err := converter.Output(i.Container); err != nil { + return fmt.Errorf("failed to process output [type: %s, action: %s]: %w", + outputConfig.Type, outputConfig.Action, err) + } + } + + log.Println("Done!") + return nil +} diff --git a/lib/lib.go b/lib/lib.go new file mode 100644 index 000000000..5d9e4ce07 --- /dev/null +++ b/lib/lib.go @@ -0,0 +1,46 @@ +package lib + +const ( + ActionAdd Action = "add" + ActionRemove Action = "remove" + ActionOutput Action = "output" + + CaseRemovePrefix CaseRemove = 0 + CaseRemoveEntry CaseRemove = 1 +) + +var ActionsRegistry = map[Action]bool{ + ActionAdd: true, + ActionRemove: true, + ActionOutput: true, +} + +type Action string + +type CaseRemove int + +type Typer interface { + GetType() string +} + +type Actioner interface { + GetAction() Action +} + +type Descriptioner interface { + GetDescription() string +} + +type InputConverter interface { + Typer + Actioner + Descriptioner + Input(Container) (Container, error) +} + +type OutputConverter interface { + Typer + Actioner + Descriptioner + Output(Container) error +} diff --git a/list.go b/list.go new file mode 100644 index 000000000..cdba524c3 --- /dev/null +++ b/list.go @@ -0,0 +1,65 @@ +package main + +import ( + "fmt" + "log" + + "github.com/Loyalsoldier/domain-list-custom/lib" + "github.com/spf13/cobra" +) + +func init() { + rootCmd.AddCommand(listCmd) + listCmd.PersistentFlags().StringP("config", "c", "config.json", "URI of the JSON format config file") +} + +var listCmd = &cobra.Command{ + Use: "list", + Aliases: []string{"ls"}, + Short: "List available domain lists", + Run: func(cmd *cobra.Command, args []string) { + configFile, _ := cmd.Flags().GetString("config") + + instance, err := lib.NewInstance() + if err != nil { + log.Fatal(err) + } + + if err := instance.InitConfig(configFile); err != nil { + log.Fatal(err) + } + + // Process only input to get the list + for idx, inputConfig := range instance.Config.Input { + log.Printf("Processing input [%d/%d]: type=%s, action=%s", + idx+1, len(instance.Config.Input), inputConfig.Type, inputConfig.Action) + + converter, err := inputConfig.GetInputConverter() + if err != nil { + log.Fatal(err) + } + + newContainer, err := converter.Input(instance.Container) + if err != nil { + log.Fatal(err) + } + + if newContainer != nil { + instance.Container = newContainer + } + } + + // List all entries + fmt.Printf("\nAvailable domain lists (%d total):\n", instance.Container.Len()) + fmt.Println("---") + + names := instance.Container.GetNames() + for _, name := range names { + entry, found := instance.Container.GetEntry(name) + if !found { + continue + } + fmt.Printf(" - %s (%d domains)\n", name, len(entry.GetDomains())) + } + }, +} diff --git a/listinfo.go b/listinfo.go deleted file mode 100644 index 9a0f58a13..000000000 --- a/listinfo.go +++ /dev/null @@ -1,362 +0,0 @@ -package main - -import ( - "bufio" - "errors" - "fmt" - "os" - "sort" - "strings" - "time" - - router "github.com/v2fly/v2ray-core/v5/app/router/routercommon" -) - -// ListInfo is the information structure of a single file in data directory. -// It includes all types of rules of the file, as well as servel types of -// sturctures of same items for convenience in later process. -type ListInfo struct { - Name fileName - HasInclusion bool - InclusionAttributeMap map[fileName][]attribute - FullTypeList []*router.Domain - KeywordTypeList []*router.Domain - RegexpTypeList []*router.Domain - AttributeRuleUniqueList []*router.Domain - DomainTypeList []*router.Domain - DomainTypeUniqueList []*router.Domain - AttributeRuleListMap map[attribute][]*router.Domain - GeoSite *router.GeoSite -} - -// NewListInfo return a ListInfo -func NewListInfo() *ListInfo { - return &ListInfo{ - InclusionAttributeMap: make(map[fileName][]attribute), - FullTypeList: make([]*router.Domain, 0, 10), - KeywordTypeList: make([]*router.Domain, 0, 10), - RegexpTypeList: make([]*router.Domain, 0, 10), - AttributeRuleUniqueList: make([]*router.Domain, 0, 10), - DomainTypeList: make([]*router.Domain, 0, 10), - DomainTypeUniqueList: make([]*router.Domain, 0, 10), - AttributeRuleListMap: make(map[attribute][]*router.Domain), - } -} - -// ProcessList processes each line of every single file in the data directory -// and generates a ListInfo of each file. -func (l *ListInfo) ProcessList(file *os.File) error { - scanner := bufio.NewScanner(file) - // Parse a file line by line to generate ListInfo - for scanner.Scan() { - line := scanner.Text() - if isEmpty(line) { - continue - } - line = removeComment(line) - if isEmpty(line) { - continue - } - parsedRule, err := l.parseRule(line) - if err != nil { - return err - } - if parsedRule == nil { - continue - } - l.classifyRule(parsedRule) - } - if err := scanner.Err(); err != nil { - return err - } - - return nil -} - -// parseRule parses a single rule -func (l *ListInfo) parseRule(line string) (*router.Domain, error) { - line = strings.TrimSpace(line) - - if line == "" { - return nil, errors.New("empty line") - } - - // Parse `include` rule first, eg: `include:google`, `include:google @cn @gfw` - if strings.HasPrefix(line, "include:") { - l.parseInclusion(line) - return nil, nil - } - - parts := strings.Split(line, " ") - ruleWithType := strings.TrimSpace(parts[0]) - if ruleWithType == "" { - return nil, errors.New("empty rule") - } - - var rule router.Domain - if err := l.parseTypeRule(ruleWithType, &rule); err != nil { - return nil, err - } - - for _, attrString := range parts[1:] { - if attrString = strings.TrimSpace(attrString); attrString != "" { - attr, err := l.parseAttribute(attrString) - if err != nil { - return nil, err - } - rule.Attribute = append(rule.Attribute, attr) - } - } - - return &rule, nil -} - -func (l *ListInfo) parseInclusion(inclusion string) { - inclusionVal := strings.TrimPrefix(strings.TrimSpace(inclusion), "include:") - l.HasInclusion = true - inclusionValSlice := strings.Split(inclusionVal, "@") - filename := fileName(strings.ToUpper(strings.TrimSpace(inclusionValSlice[0]))) - switch len(inclusionValSlice) { - case 1: // Inclusion without attribute - // Use '@' as the placeholder attribute for 'include:filename' - l.InclusionAttributeMap[filename] = append(l.InclusionAttributeMap[filename], attribute("@")) - default: // Inclusion with attribute(s) - // support new inclusion syntax, eg: `include:google @cn @gfw` - for _, attr := range inclusionValSlice[1:] { - attr = strings.ToLower(strings.TrimSpace(attr)) - if attr != "" { - // Added in this format: '@cn' - l.InclusionAttributeMap[filename] = append(l.InclusionAttributeMap[filename], attribute("@"+attr)) - } - } - } -} - -func (l *ListInfo) parseTypeRule(domain string, rule *router.Domain) error { - kv := strings.Split(domain, ":") - switch len(kv) { - case 1: // line without type prefix - rule.Type = router.Domain_RootDomain - rule.Value = strings.ToLower(strings.TrimSpace(kv[0])) - case 2: // line with type prefix - ruleType := strings.TrimSpace(kv[0]) - ruleVal := strings.TrimSpace(kv[1]) - rule.Value = strings.ToLower(ruleVal) - switch strings.ToLower(ruleType) { - case "full": - rule.Type = router.Domain_Full - case "domain": - rule.Type = router.Domain_RootDomain - case "keyword": - rule.Type = router.Domain_Plain - case "regexp": - rule.Type = router.Domain_Regex - rule.Value = ruleVal - default: - return errors.New("unknown domain type: " + ruleType) - } - } - return nil -} - -func (l *ListInfo) parseAttribute(attr string) (*router.Domain_Attribute, error) { - if attr[0] != '@' { - return nil, errors.New("invalid attribute: " + attr) - } - attr = attr[1:] // Trim out attribute prefix `@` character - - var attribute router.Domain_Attribute - attribute.Key = strings.ToLower(attr) - attribute.TypedValue = &router.Domain_Attribute_BoolValue{BoolValue: true} - return &attribute, nil -} - -// classifyRule classifies a single rule and write into *ListInfo -func (l *ListInfo) classifyRule(rule *router.Domain) { - if len(rule.Attribute) > 0 { - l.AttributeRuleUniqueList = append(l.AttributeRuleUniqueList, rule) - var attrsString attribute - for _, attr := range rule.Attribute { - attrsString += attribute("@" + attr.GetKey()) // attrsString will be "@cn@ads" if there are more than one attributes - } - l.AttributeRuleListMap[attrsString] = append(l.AttributeRuleListMap[attrsString], rule) - } else { - switch rule.Type { - case router.Domain_Full: - l.FullTypeList = append(l.FullTypeList, rule) - case router.Domain_RootDomain: - l.DomainTypeList = append(l.DomainTypeList, rule) - case router.Domain_Plain: - l.KeywordTypeList = append(l.KeywordTypeList, rule) - case router.Domain_Regex: - l.RegexpTypeList = append(l.RegexpTypeList, rule) - } - } -} - -// Flatten flattens the rules in a file that have "include" syntax -// in data directory, and adds those need-to-included rules into it. -// This feature supports the "include:filename@attribute" syntax. -// It also generates a domain trie of domain-typed rules for each file -// to remove duplications of them. -func (l *ListInfo) Flatten(lm *ListInfoMap) error { - if l.HasInclusion { - for filename, attrs := range l.InclusionAttributeMap { - for _, attrWanted := range attrs { - includedList := (*lm)[filename] - switch string(attrWanted) { - case "@": - l.FullTypeList = append(l.FullTypeList, includedList.FullTypeList...) - l.DomainTypeList = append(l.DomainTypeList, includedList.DomainTypeList...) - l.KeywordTypeList = append(l.KeywordTypeList, includedList.KeywordTypeList...) - l.RegexpTypeList = append(l.RegexpTypeList, includedList.RegexpTypeList...) - l.AttributeRuleUniqueList = append(l.AttributeRuleUniqueList, includedList.AttributeRuleUniqueList...) - for attr, domainList := range includedList.AttributeRuleListMap { - l.AttributeRuleListMap[attr] = append(l.AttributeRuleListMap[attr], domainList...) - } - - default: - for attr, domainList := range includedList.AttributeRuleListMap { - // If there are more than one attribute attached to the rule, - // the attribute key of AttributeRuleListMap in ListInfo - // will be like: "@cn@ads". - // So if to extract rules with a specific attribute, it is necessary - // also to test the multi-attribute keys of AttributeRuleListMap. - // Notice: if "include:google @cn" and "include:google @ads" appear - // at the same time in the parent list. There are chances that the same - // rule with that two attributes(`@cn` and `@ads`) will be included twice in the parent list. - if strings.Contains(string(attr)+"@", string(attrWanted)+"@") { - l.AttributeRuleListMap[attr] = append(l.AttributeRuleListMap[attr], domainList...) - l.AttributeRuleUniqueList = append(l.AttributeRuleUniqueList, domainList...) - } - } - } - } - } - } - - sort.Slice(l.DomainTypeList, func(i, j int) bool { - return len(strings.Split(l.DomainTypeList[i].GetValue(), ".")) < len(strings.Split(l.DomainTypeList[j].GetValue(), ".")) - }) - - trie := NewDomainTrie() - for _, domain := range l.DomainTypeList { - success, err := trie.Insert(domain.GetValue()) - if err != nil { - return err - } - if success { - l.DomainTypeUniqueList = append(l.DomainTypeUniqueList, domain) - } - } - - return nil -} - -// ToGeoSite converts every ListInfo into a router.GeoSite structure. -// It also excludes rules with certain attributes in certain files that -// user specified in command line when runing the program. -func (l *ListInfo) ToGeoSite(excludeAttrs map[fileName]map[attribute]bool) { - geosite := new(router.GeoSite) - geosite.CountryCode = string(l.Name) - geosite.Domain = append(geosite.Domain, l.FullTypeList...) - geosite.Domain = append(geosite.Domain, l.DomainTypeUniqueList...) - geosite.Domain = append(geosite.Domain, l.RegexpTypeList...) - - for _, keywordRule := range l.KeywordTypeList { - if len(strings.TrimSpace(keywordRule.GetValue())) > 0 { - geosite.Domain = append(geosite.Domain, keywordRule) - } - } - - if excludeAttrs != nil && excludeAttrs[l.Name] != nil { - excludeAttrsMap := excludeAttrs[l.Name] - for _, domain := range l.AttributeRuleUniqueList { - ifKeep := true - for _, attr := range domain.GetAttribute() { - if excludeAttrsMap[attribute(attr.GetKey())] { - ifKeep = false - break - } - } - if ifKeep { - geosite.Domain = append(geosite.Domain, domain) - } - } - } else { - geosite.Domain = append(geosite.Domain, l.AttributeRuleUniqueList...) - } - l.GeoSite = geosite -} - -// ToPlainText convert router.GeoSite structure to plaintext format. -func (l *ListInfo) ToPlainText() []byte { - plaintextBytes := make([]byte, 0, 1024*512) - - for _, rule := range l.GeoSite.Domain { - ruleVal := strings.TrimSpace(rule.GetValue()) - if len(ruleVal) == 0 { - continue - } - - var ruleString string - switch rule.Type { - case router.Domain_Full: - ruleString = "full:" + ruleVal - case router.Domain_RootDomain: - ruleString = "domain:" + ruleVal - case router.Domain_Plain: - ruleString = "keyword:" + ruleVal - case router.Domain_Regex: - ruleString = "regexp:" + ruleVal - } - - if len(rule.Attribute) > 0 { - ruleString += ":" - for _, attr := range rule.Attribute { - ruleString += "@" + attr.GetKey() + "," - } - ruleString = strings.TrimRight(ruleString, ",") - } - // Output format is: type:domain.tld:@attr1,@attr2 - plaintextBytes = append(plaintextBytes, []byte(ruleString+"\n")...) - } - - return plaintextBytes -} - -// ToGFWList converts router.GeoSite to GFWList format. -func (l *ListInfo) ToGFWList() []byte { - loc, _ := time.LoadLocation("Asia/Shanghai") - timeString := fmt.Sprintf("! Last Modified: %s\n", time.Now().In(loc).Format(time.RFC1123)) - - gfwlistBytes := make([]byte, 0, 1024*512) - gfwlistBytes = append(gfwlistBytes, []byte("[AutoProxy 0.2.9]\n")...) - gfwlistBytes = append(gfwlistBytes, []byte(timeString)...) - gfwlistBytes = append(gfwlistBytes, []byte("! Expires: 24h\n")...) - gfwlistBytes = append(gfwlistBytes, []byte("! HomePage: https://github.com/Loyalsoldier/domain-list-custom\n")...) - gfwlistBytes = append(gfwlistBytes, []byte("! GitHub URL: https://raw.githubusercontent.com/Loyalsoldier/domain-list-custom/release/gfwlist.txt\n")...) - gfwlistBytes = append(gfwlistBytes, []byte("! jsdelivr URL: https://cdn.jsdelivr.net/gh/Loyalsoldier/domain-list-custom@release/gfwlist.txt\n")...) - gfwlistBytes = append(gfwlistBytes, []byte("\n")...) - - for _, rule := range l.GeoSite.Domain { - ruleVal := strings.TrimSpace(rule.GetValue()) - if len(ruleVal) == 0 { - continue - } - - switch rule.Type { - case router.Domain_Full: - gfwlistBytes = append(gfwlistBytes, []byte("|http://"+ruleVal+"\n")...) - gfwlistBytes = append(gfwlistBytes, []byte("|https://"+ruleVal+"\n")...) - case router.Domain_RootDomain: - gfwlistBytes = append(gfwlistBytes, []byte("||"+ruleVal+"\n")...) - case router.Domain_Plain: - gfwlistBytes = append(gfwlistBytes, []byte(ruleVal+"\n")...) - case router.Domain_Regex: - gfwlistBytes = append(gfwlistBytes, []byte("/"+ruleVal+"/\n")...) - } - } - - return gfwlistBytes -} diff --git a/listinfomap.go b/listinfomap.go deleted file mode 100644 index 4149c4e40..000000000 --- a/listinfomap.go +++ /dev/null @@ -1,132 +0,0 @@ -package main - -import ( - "errors" - "fmt" - "os" - "path/filepath" - "strings" - - router "github.com/v2fly/v2ray-core/v5/app/router/routercommon" -) - -// ListInfoMap is the map of files in data directory and ListInfo -type ListInfoMap map[fileName]*ListInfo - -// Marshal processes a file in data directory and generates ListInfo for it. -func (lm *ListInfoMap) Marshal(path string) error { - file, err := os.Open(path) - if err != nil { - return err - } - defer file.Close() - - list := NewListInfo() - listName := fileName(strings.ToUpper(filepath.Base(path))) - list.Name = listName - if err := list.ProcessList(file); err != nil { - return err - } - - (*lm)[listName] = list - return nil -} - -// FlattenAndGenUniqueDomainList flattens the included lists and -// generates a domain trie for each file in data directory to -// make the items of domain type list unique. -func (lm *ListInfoMap) FlattenAndGenUniqueDomainList() error { - inclusionLevel := make([]map[fileName]bool, 0, 20) - okayList := make(map[fileName]bool) - inclusionLevelAllLength, loopTimes := 0, 0 - - for inclusionLevelAllLength < len(*lm) { - inclusionMap := make(map[fileName]bool) - - if loopTimes == 0 { - for _, listinfo := range *lm { - if listinfo.HasInclusion { - continue - } - inclusionMap[listinfo.Name] = true - } - } else { - for _, listinfo := range *lm { - if !listinfo.HasInclusion || okayList[listinfo.Name] { - continue - } - - var passTimes int - for filename := range listinfo.InclusionAttributeMap { - if !okayList[filename] { - break - } - passTimes++ - } - if passTimes == len(listinfo.InclusionAttributeMap) { - inclusionMap[listinfo.Name] = true - } - } - } - - for filename := range inclusionMap { - okayList[filename] = true - } - - inclusionLevel = append(inclusionLevel, inclusionMap) - inclusionLevelAllLength += len(inclusionMap) - loopTimes++ - } - - for idx, inclusionMap := range inclusionLevel { - fmt.Printf("Level %d:\n", idx+1) - fmt.Println(inclusionMap) - fmt.Println() - - for inclusionFilename := range inclusionMap { - if err := (*lm)[inclusionFilename].Flatten(lm); err != nil { - return err - } - } - } - - return nil -} - -// ToProto generates a router.GeoSite for each file in data directory -// and returns a router.GeoSiteList -func (lm *ListInfoMap) ToProto(excludeAttrs map[fileName]map[attribute]bool) *router.GeoSiteList { - protoList := new(router.GeoSiteList) - for _, listinfo := range *lm { - listinfo.ToGeoSite(excludeAttrs) - protoList.Entry = append(protoList.Entry, listinfo.GeoSite) - } - return protoList -} - -// ToPlainText returns a map of exported lists that user wants -// and the contents of them in byte format. -func (lm *ListInfoMap) ToPlainText(exportListsMap []string) (map[string][]byte, error) { - filePlainTextBytesMap := make(map[string][]byte) - for _, filename := range exportListsMap { - if listinfo := (*lm)[fileName(strings.ToUpper(filename))]; listinfo != nil { - plaintextBytes := listinfo.ToPlainText() - filePlainTextBytesMap[filename] = plaintextBytes - } else { - fmt.Println("Notice: " + filename + ": no such exported list in the directory, skipped.") - } - } - return filePlainTextBytesMap, nil -} - -// ToGFWList returns the content of the list to be generated into GFWList format -// that user wants in bytes format. -func (lm *ListInfoMap) ToGFWList(togfwlist string) ([]byte, error) { - if togfwlist != "" { - if listinfo := (*lm)[fileName(strings.ToUpper(togfwlist))]; listinfo != nil { - return listinfo.ToGFWList(), nil - } - return nil, errors.New("no such list: " + togfwlist) - } - return nil, nil -} diff --git a/main.go b/main.go index ef4bf5c18..c15897101 100644 --- a/main.go +++ b/main.go @@ -1,133 +1,21 @@ package main import ( - "encoding/base64" - "flag" - "fmt" - "os" - "path/filepath" - "strings" + "log" - "google.golang.org/protobuf/proto" + "github.com/spf13/cobra" ) -var ( - dataPath = flag.String("datapath", filepath.Join("./", "data"), "Path to your custom 'data' directory") - datName = flag.String("datname", "geosite.dat", "Name of the generated dat file") - outputPath = flag.String("outputpath", "./publish", "Output path to the generated files") - exportLists = flag.String("exportlists", "category-ads-all,tld-cn,cn,geolocation-cn,tld-!cn,geolocation-!cn,private,apple,icloud,google,steam", "Lists to be exported in plaintext format, separated by ',' comma") - excludeAttrs = flag.String("excludeattrs", "cn@!cn@ads,geolocation-cn@!cn@ads,geolocation-!cn@cn@ads", "Exclude rules with certain attributes in certain lists, seperated by ',' comma, support multiple attributes in one list. Example: geolocation-!cn@cn@ads,geolocation-cn@!cn") - toGFWList = flag.String("togfwlist", "geolocation-!cn", "List to be exported in GFWList format") -) +var rootCmd = &cobra.Command{ + Use: "domain-list-custom", + Short: "domain-list-custom is a tool to convert and manage domain lists in various formats", + CompletionOptions: cobra.CompletionOptions{ + HiddenDefaultCmd: true, + }, +} func main() { - flag.Parse() - - dir := GetDataDir() - listInfoMap := make(ListInfoMap) - - if err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if info.IsDir() { - return nil - } - if err := listInfoMap.Marshal(path); err != nil { - return err - } - return nil - }); err != nil { - fmt.Println("Failed:", err) - os.Exit(1) - } - - if err := listInfoMap.FlattenAndGenUniqueDomainList(); err != nil { - fmt.Println("Failed:", err) - os.Exit(1) - } - - // Process and split *excludeRules - excludeAttrsInFile := make(map[fileName]map[attribute]bool) - if *excludeAttrs != "" { - exFilenameAttrSlice := strings.Split(*excludeAttrs, ",") - for _, exFilenameAttr := range exFilenameAttrSlice { - exFilenameAttr = strings.TrimSpace(exFilenameAttr) - exFilenameAttrMap := strings.Split(exFilenameAttr, "@") - filename := fileName(strings.ToUpper(strings.TrimSpace(exFilenameAttrMap[0]))) - excludeAttrsInFile[filename] = make(map[attribute]bool) - for _, attr := range exFilenameAttrMap[1:] { - attr = strings.TrimSpace(attr) - if len(attr) > 0 { - excludeAttrsInFile[filename][attribute(attr)] = true - } - } - } - } - - // Process and split *exportLists - var exportListsSlice []string - if *exportLists != "" { - tempSlice := strings.Split(*exportLists, ",") - for _, exportList := range tempSlice { - exportList = strings.TrimSpace(exportList) - if len(exportList) > 0 { - exportListsSlice = append(exportListsSlice, exportList) - } - } - } - - // Generate dlc.dat - if geositeList := listInfoMap.ToProto(excludeAttrsInFile); geositeList != nil { - protoBytes, err := proto.Marshal(geositeList) - if err != nil { - fmt.Println("Failed:", err) - os.Exit(1) - } - if err := os.MkdirAll(*outputPath, 0755); err != nil { - fmt.Println("Failed:", err) - os.Exit(1) - } - if err := os.WriteFile(filepath.Join(*outputPath, *datName), protoBytes, 0644); err != nil { - fmt.Println("Failed:", err) - os.Exit(1) - } else { - fmt.Printf("%s has been generated successfully in '%s'.\n", *datName, *outputPath) - } - } - - // Generate plaintext list files - if filePlainTextBytesMap, err := listInfoMap.ToPlainText(exportListsSlice); err == nil { - for filename, plaintextBytes := range filePlainTextBytesMap { - filename += ".txt" - if err := os.WriteFile(filepath.Join(*outputPath, filename), plaintextBytes, 0644); err != nil { - fmt.Println("Failed:", err) - os.Exit(1) - } else { - fmt.Printf("%s has been generated successfully in '%s'.\n", filename, *outputPath) - } - } - } else { - fmt.Println("Failed:", err) - os.Exit(1) - } - - // Generate gfwlist.txt - if gfwlistBytes, err := listInfoMap.ToGFWList(*toGFWList); err == nil { - if f, err := os.OpenFile(filepath.Join(*outputPath, "gfwlist.txt"), os.O_RDWR|os.O_CREATE, 0644); err != nil { - fmt.Println("Failed:", err) - os.Exit(1) - } else { - encoder := base64.NewEncoder(base64.StdEncoding, f) - defer encoder.Close() - if _, err := encoder.Write(gfwlistBytes); err != nil { - fmt.Println("Failed:", err) - os.Exit(1) - } - fmt.Printf("gfwlist.txt has been generated successfully in '%s'.\n", *outputPath) - } - } else { - fmt.Println("Failed:", err) - os.Exit(1) + if err := rootCmd.Execute(); err != nil { + log.Fatal(err) } } diff --git a/plugin/plaintext/domainlist_in.go b/plugin/plaintext/domainlist_in.go new file mode 100644 index 000000000..49b0e53a8 --- /dev/null +++ b/plugin/plaintext/domainlist_in.go @@ -0,0 +1,356 @@ +package plaintext + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/Loyalsoldier/domain-list-custom/lib" + router "github.com/v2fly/v2ray-core/v5/app/router/routercommon" +) + +const ( + TypeDomainListIn = "domainlist" + DescDomainListIn = "Convert domain list to other formats" +) + +func init() { + lib.RegisterInputConfigCreator(TypeDomainListIn, func(action lib.Action, data json.RawMessage) (lib.InputConverter, error) { + return newDomainListIn(action, data) + }) + lib.RegisterInputConverter(TypeDomainListIn, &DomainListIn{ + Description: DescDomainListIn, + }) +} + +type DomainListIn struct { + Type string + Action lib.Action + Description string + DataDir string + Want map[string]bool +} + +type fileInfo struct { + Name string + HasInclusion bool + InclusionAttributeMap map[string][]string + Domains []*router.Domain +} + +func newDomainListIn(action lib.Action, data json.RawMessage) (lib.InputConverter, error) { + var tmp struct { + DataDir string `json:"dataDir"` + Want []string `json:"wantedList"` + } + + if len(data) > 0 { + if err := json.Unmarshal(data, &tmp); err != nil { + return nil, err + } + } + + if tmp.DataDir == "" { + return nil, fmt.Errorf("dataDir is required") + } + + // Filter wanted list + wantList := make(map[string]bool) + for _, want := range tmp.Want { + if want = strings.ToUpper(strings.TrimSpace(want)); want != "" { + wantList[want] = true + } + } + + return &DomainListIn{ + Type: TypeDomainListIn, + Action: action, + Description: DescDomainListIn, + DataDir: tmp.DataDir, + Want: wantList, + }, nil +} + +func (d *DomainListIn) GetType() string { + return d.Type +} + +func (d *DomainListIn) GetAction() lib.Action { + return d.Action +} + +func (d *DomainListIn) GetDescription() string { + return d.Description +} + +func (d *DomainListIn) Input(container lib.Container) (lib.Container, error) { + // Read all files from data directory + fileInfoMap := make(map[string]*fileInfo) + + err := filepath.Walk(d.DataDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + + filename := strings.ToUpper(filepath.Base(path)) + + // Filter by wanted list if specified + if len(d.Want) > 0 && !d.Want[filename] { + return nil + } + + fileData, err := d.processFile(path, filename) + if err != nil { + return fmt.Errorf("failed to process file %s: %w", path, err) + } + + fileInfoMap[filename] = fileData + return nil + }) + + if err != nil { + return nil, err + } + + // Process inclusions + if err := d.processInclusions(fileInfoMap); err != nil { + return nil, err + } + + // Add entries to container + for filename, fileData := range fileInfoMap { + entry := lib.NewEntry(filename) + entry.AddDomains(fileData.Domains) + + if err := container.Add(entry); err != nil { + return nil, err + } + } + + return container, nil +} + +func (d *DomainListIn) processFile(path string, filename string) (*fileInfo, error) { + file, err := os.Open(path) + if err != nil { + return nil, err + } + defer file.Close() + + info := &fileInfo{ + Name: filename, + InclusionAttributeMap: make(map[string][]string), + Domains: make([]*router.Domain, 0), + } + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + + if lib.IsEmpty(line) { + continue + } + + line = lib.RemoveComment(line) + if lib.IsEmpty(line) { + continue + } + + // Parse rule + domain, isInclusion, err := d.parseRule(line, info) + if err != nil { + return nil, fmt.Errorf("failed to parse rule '%s': %w", line, err) + } + + if !isInclusion && domain != nil { + info.Domains = append(info.Domains, domain) + } + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return info, nil +} + +func (d *DomainListIn) parseRule(line string, info *fileInfo) (*router.Domain, bool, error) { + line = strings.TrimSpace(line) + + if line == "" { + return nil, false, errors.New("empty line") + } + + // Parse include rule + if strings.HasPrefix(line, "include:") { + d.parseInclusion(line, info) + return nil, true, nil + } + + parts := strings.Split(line, " ") + ruleWithType := strings.TrimSpace(parts[0]) + if ruleWithType == "" { + return nil, false, errors.New("empty rule") + } + + var domain router.Domain + if err := d.parseTypeRule(ruleWithType, &domain); err != nil { + return nil, false, err + } + + // Parse attributes + for _, attrString := range parts[1:] { + if attrString = strings.TrimSpace(attrString); attrString != "" { + attr, err := d.parseAttribute(attrString) + if err != nil { + return nil, false, err + } + domain.Attribute = append(domain.Attribute, attr) + } + } + + return &domain, false, nil +} + +func (d *DomainListIn) parseInclusion(inclusion string, info *fileInfo) { + inclusionVal := strings.TrimPrefix(strings.TrimSpace(inclusion), "include:") + info.HasInclusion = true + + inclusionValSlice := strings.Split(inclusionVal, "@") + filename := strings.ToUpper(strings.TrimSpace(inclusionValSlice[0])) + + switch len(inclusionValSlice) { + case 1: + // Inclusion without attribute + info.InclusionAttributeMap[filename] = append(info.InclusionAttributeMap[filename], "@") + default: + // Inclusion with attributes + for _, attr := range inclusionValSlice[1:] { + attr = strings.ToLower(strings.TrimSpace(attr)) + if attr != "" { + info.InclusionAttributeMap[filename] = append(info.InclusionAttributeMap[filename], "@"+attr) + } + } + } +} + +func (d *DomainListIn) parseTypeRule(domain string, rule *router.Domain) error { + kv := strings.Split(domain, ":") + switch len(kv) { + case 1: + // Line without type prefix + rule.Type = router.Domain_RootDomain + rule.Value = strings.ToLower(strings.TrimSpace(kv[0])) + case 2: + // Line with type prefix + ruleType := strings.TrimSpace(kv[0]) + ruleVal := strings.TrimSpace(kv[1]) + rule.Value = strings.ToLower(ruleVal) + + switch strings.ToLower(ruleType) { + case "full": + rule.Type = router.Domain_Full + case "domain": + rule.Type = router.Domain_RootDomain + case "keyword": + rule.Type = router.Domain_Plain + case "regexp": + rule.Type = router.Domain_Regex + rule.Value = ruleVal // Keep original case for regex + default: + return fmt.Errorf("unknown domain type: %s", ruleType) + } + } + return nil +} + +func (d *DomainListIn) parseAttribute(attr string) (*router.Domain_Attribute, error) { + if attr[0] != '@' { + return nil, fmt.Errorf("invalid attribute: %s", attr) + } + attr = attr[1:] // Trim attribute prefix '@' + + var attribute router.Domain_Attribute + attribute.Key = strings.ToLower(attr) + attribute.TypedValue = &router.Domain_Attribute_BoolValue{BoolValue: true} + + return &attribute, nil +} + +func (d *DomainListIn) processInclusions(fileInfoMap map[string]*fileInfo) error { + // Build dependency levels + processed := make(map[string]bool) + + for len(processed) < len(fileInfoMap) { + changed := false + + for filename, info := range fileInfoMap { + if processed[filename] { + continue + } + + // Check if all dependencies are processed + canProcess := true + if info.HasInclusion { + for depName := range info.InclusionAttributeMap { + if !processed[depName] { + canProcess = false + break + } + } + } + + if canProcess || !info.HasInclusion { + // Process inclusions + if info.HasInclusion { + for depName, attrs := range info.InclusionAttributeMap { + depInfo := fileInfoMap[depName] + if depInfo == nil { + return fmt.Errorf("included file %s not found", depName) + } + + for _, attrWanted := range attrs { + if attrWanted == "@" { + // Include all domains + info.Domains = append(info.Domains, depInfo.Domains...) + } else { + // Include domains with specific attribute + for _, domain := range depInfo.Domains { + for _, attr := range domain.Attribute { + if "@"+attr.GetKey() == attrWanted { + info.Domains = append(info.Domains, domain) + break + } + } + } + } + } + } + } + + processed[filename] = true + changed = true + } + } + + if !changed { + // Circular dependency detected + var unprocessed []string + for filename := range fileInfoMap { + if !processed[filename] { + unprocessed = append(unprocessed, filename) + } + } + return fmt.Errorf("circular dependency detected in files: %v", unprocessed) + } + } + + return nil +} diff --git a/plugin/plaintext/text_out.go b/plugin/plaintext/text_out.go new file mode 100644 index 000000000..6db67f480 --- /dev/null +++ b/plugin/plaintext/text_out.go @@ -0,0 +1,141 @@ +package plaintext + +import ( + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" + "slices" + "strings" + + "github.com/Loyalsoldier/domain-list-custom/lib" +) + +const ( + TypeTextOut = "text" + DescTextOut = "Convert domain lists to plaintext format" +) + +func init() { + lib.RegisterOutputConfigCreator(TypeTextOut, func(action lib.Action, data json.RawMessage) (lib.OutputConverter, error) { + return newTextOut(action, data) + }) + lib.RegisterOutputConverter(TypeTextOut, &TextOut{ + Description: DescTextOut, + }) +} + +type TextOut struct { + Type string + Action lib.Action + Description string + OutputDir string + OutputExt string + Want []string + Exclude []string +} + +func newTextOut(action lib.Action, data json.RawMessage) (lib.OutputConverter, error) { + var tmp struct { + OutputDir string `json:"outputDir"` + Want []string `json:"wantedList"` + Exclude []string `json:"excludedList"` + } + + if len(data) > 0 { + if err := json.Unmarshal(data, &tmp); err != nil { + return nil, err + } + } + + if tmp.OutputDir == "" { + tmp.OutputDir = "./output" + } + + return &TextOut{ + Type: TypeTextOut, + Action: action, + Description: DescTextOut, + OutputDir: tmp.OutputDir, + OutputExt: ".txt", + Want: tmp.Want, + Exclude: tmp.Exclude, + }, nil +} + +func (t *TextOut) GetType() string { + return t.Type +} + +func (t *TextOut) GetAction() lib.Action { + return t.Action +} + +func (t *TextOut) GetDescription() string { + return t.Description +} + +func (t *TextOut) Output(container lib.Container) error { + // Create output directory + if err := os.MkdirAll(t.OutputDir, 0755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + for _, name := range t.filterAndSortList(container) { + entry, found := container.GetEntry(name) + if !found { + log.Printf("❌ entry %s not found\n", name) + continue + } + + data, err := entry.MarshalText() + if err != nil { + return fmt.Errorf("failed to marshal entry %s: %w", name, err) + } + + filename := strings.ToLower(entry.GetName()) + t.OutputExt + filepath := filepath.Join(t.OutputDir, filename) + + if err := os.WriteFile(filepath, data, 0644); err != nil { + return fmt.Errorf("failed to write file %s: %w", filepath, err) + } + + log.Printf("✅ Generated %s\n", filename) + } + + return nil +} + +func (t *TextOut) filterAndSortList(container lib.Container) []string { + excludeMap := make(map[string]bool) + for _, exclude := range t.Exclude { + if exclude = strings.ToUpper(strings.TrimSpace(exclude)); exclude != "" { + excludeMap[exclude] = true + } + } + + wantList := make([]string, 0, len(t.Want)) + for _, want := range t.Want { + if want = strings.ToUpper(strings.TrimSpace(want)); want != "" && !excludeMap[want] { + wantList = append(wantList, want) + } + } + + if len(wantList) > 0 { + slices.Sort(wantList) + return wantList + } + + list := make([]string, 0, 300) + for entry := range container.Loop() { + name := entry.GetName() + if excludeMap[name] { + continue + } + list = append(list, name) + } + + slices.Sort(list) + return list +} diff --git a/plugin/v2ray/geosite_out.go b/plugin/v2ray/geosite_out.go new file mode 100644 index 000000000..362aaa736 --- /dev/null +++ b/plugin/v2ray/geosite_out.go @@ -0,0 +1,285 @@ +package v2ray + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" + "slices" + "strings" + "time" + + "github.com/Loyalsoldier/domain-list-custom/lib" + router "github.com/v2fly/v2ray-core/v5/app/router/routercommon" + "google.golang.org/protobuf/proto" +) + +const ( + TypeGeositeOut = "v2rayGeoSite" + DescGeositeOut = "Convert domain lists to V2Ray geosite format" +) + +func init() { + lib.RegisterOutputConfigCreator(TypeGeositeOut, func(action lib.Action, data json.RawMessage) (lib.OutputConverter, error) { + return newGeositeOut(action, data) + }) + lib.RegisterOutputConverter(TypeGeositeOut, &GeositeOut{ + Description: DescGeositeOut, + }) +} + +type GeositeOut struct { + Type string + Action lib.Action + Description string + OutputDir string + OutputName string + Want []string + Exclude []string + ExcludeAttrs map[string]map[string]bool + GFWListOutput string +} + +func newGeositeOut(action lib.Action, data json.RawMessage) (lib.OutputConverter, error) { + var tmp struct { + OutputDir string `json:"outputDir"` + OutputName string `json:"outputName"` + Want []string `json:"wantedList"` + Exclude []string `json:"excludedList"` + ExcludeAttrs string `json:"excludeAttrs"` + GFWListOutput string `json:"gfwlistOutput"` + } + + if len(data) > 0 { + if err := json.Unmarshal(data, &tmp); err != nil { + return nil, err + } + } + + if tmp.OutputDir == "" { + tmp.OutputDir = "./output" + } + + if tmp.OutputName == "" { + tmp.OutputName = "geosite.dat" + } + + // Process exclude attributes + excludeAttrsMap := make(map[string]map[string]bool) + if tmp.ExcludeAttrs != "" { + exFilenameAttrSlice := strings.Split(tmp.ExcludeAttrs, ",") + for _, exFilenameAttr := range exFilenameAttrSlice { + exFilenameAttr = strings.TrimSpace(exFilenameAttr) + exFilenameAttrMap := strings.Split(exFilenameAttr, "@") + filename := strings.ToUpper(strings.TrimSpace(exFilenameAttrMap[0])) + excludeAttrsMap[filename] = make(map[string]bool) + for _, attr := range exFilenameAttrMap[1:] { + attr = strings.TrimSpace(attr) + if len(attr) > 0 { + excludeAttrsMap[filename][attr] = true + } + } + } + } + + return &GeositeOut{ + Type: TypeGeositeOut, + Action: action, + Description: DescGeositeOut, + OutputDir: tmp.OutputDir, + OutputName: tmp.OutputName, + Want: tmp.Want, + Exclude: tmp.Exclude, + ExcludeAttrs: excludeAttrsMap, + GFWListOutput: tmp.GFWListOutput, + }, nil +} + +func (g *GeositeOut) GetType() string { + return g.Type +} + +func (g *GeositeOut) GetAction() lib.Action { + return g.Action +} + +func (g *GeositeOut) GetDescription() string { + return g.Description +} + +func (g *GeositeOut) Output(container lib.Container) error { + // Create output directory + if err := os.MkdirAll(g.OutputDir, 0755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + // Generate geosite list + geositeList := g.toGeoSiteList(container) + if geositeList == nil { + return fmt.Errorf("failed to generate geosite list") + } + + // Marshal to protobuf + protoBytes, err := proto.Marshal(geositeList) + if err != nil { + return fmt.Errorf("failed to marshal geosite list: %w", err) + } + + // Write dat file + filepath := filepath.Join(g.OutputDir, g.OutputName) + if err := os.WriteFile(filepath, protoBytes, 0644); err != nil { + return fmt.Errorf("failed to write file %s: %w", filepath, err) + } + + log.Printf("✅ Generated %s\n", g.OutputName) + + // Generate GFWList if specified + if g.GFWListOutput != "" { + if err := g.generateGFWList(container); err != nil { + return fmt.Errorf("failed to generate GFWList: %w", err) + } + } + + return nil +} + +func (g *GeositeOut) toGeoSiteList(container lib.Container) *router.GeoSiteList { + geositeList := new(router.GeoSiteList) + + for _, name := range g.filterAndSortList(container) { + entry, found := container.GetEntry(name) + if !found { + log.Printf("❌ entry %s not found\n", name) + continue + } + + geosite := g.toGeoSite(entry) + if geosite != nil { + geositeList.Entry = append(geositeList.Entry, geosite) + } + } + + return geositeList +} + +func (g *GeositeOut) toGeoSite(entry *lib.Entry) *router.GeoSite { + geosite := new(router.GeoSite) + geosite.CountryCode = strings.ToLower(entry.GetName()) + + // Filter domains based on exclude attributes + excludeAttrsMap := g.ExcludeAttrs[entry.GetName()] + + for _, domain := range entry.GetDomains() { + // Check if domain should be excluded based on attributes + if excludeAttrsMap != nil && len(domain.GetAttribute()) > 0 { + shouldExclude := false + for _, attr := range domain.GetAttribute() { + if excludeAttrsMap[attr.GetKey()] { + shouldExclude = true + break + } + } + if shouldExclude { + continue + } + } + + geosite.Domain = append(geosite.Domain, domain) + } + + return geosite +} + +func (g *GeositeOut) generateGFWList(container lib.Container) error { + // Find the entry for GFWList + listName := strings.ToUpper(g.GFWListOutput) + entry, found := container.GetEntry(listName) + if !found { + return fmt.Errorf("entry %s not found for GFWList generation", listName) + } + + // Generate GFWList content + loc, _ := time.LoadLocation("Asia/Shanghai") + timeString := fmt.Sprintf("! Last Modified: %s\n", time.Now().In(loc).Format(time.RFC1123)) + + gfwlistBytes := make([]byte, 0, 1024*512) + gfwlistBytes = append(gfwlistBytes, []byte("[AutoProxy 0.2.9]\n")...) + gfwlistBytes = append(gfwlistBytes, []byte(timeString)...) + gfwlistBytes = append(gfwlistBytes, []byte("! Expires: 24h\n")...) + gfwlistBytes = append(gfwlistBytes, []byte("! HomePage: https://github.com/Loyalsoldier/domain-list-custom\n")...) + gfwlistBytes = append(gfwlistBytes, []byte("! GitHub URL: https://raw.githubusercontent.com/Loyalsoldier/domain-list-custom/release/gfwlist.txt\n")...) + gfwlistBytes = append(gfwlistBytes, []byte("! jsdelivr URL: https://cdn.jsdelivr.net/gh/Loyalsoldier/domain-list-custom@release/gfwlist.txt\n")...) + gfwlistBytes = append(gfwlistBytes, []byte("\n")...) + + for _, domain := range entry.GetDomains() { + ruleVal := strings.TrimSpace(domain.GetValue()) + if len(ruleVal) == 0 { + continue + } + + switch domain.Type { + case router.Domain_Full: + gfwlistBytes = append(gfwlistBytes, []byte("|http://"+ruleVal+"\n")...) + gfwlistBytes = append(gfwlistBytes, []byte("|https://"+ruleVal+"\n")...) + case router.Domain_RootDomain: + gfwlistBytes = append(gfwlistBytes, []byte("||"+ruleVal+"\n")...) + case router.Domain_Plain: + gfwlistBytes = append(gfwlistBytes, []byte(ruleVal+"\n")...) + case router.Domain_Regex: + gfwlistBytes = append(gfwlistBytes, []byte("/"+ruleVal+"/\n")...) + } + } + + // Encode to base64 and write to file + filepath := filepath.Join(g.OutputDir, "gfwlist.txt") + f, err := os.OpenFile(filepath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return fmt.Errorf("failed to create gfwlist file: %w", err) + } + defer f.Close() + + encoder := base64.NewEncoder(base64.StdEncoding, f) + defer encoder.Close() + + if _, err := encoder.Write(gfwlistBytes); err != nil { + return fmt.Errorf("failed to write gfwlist: %w", err) + } + + log.Printf("✅ Generated gfwlist.txt\n") + return nil +} + +func (g *GeositeOut) filterAndSortList(container lib.Container) []string { + excludeMap := make(map[string]bool) + for _, exclude := range g.Exclude { + if exclude = strings.ToUpper(strings.TrimSpace(exclude)); exclude != "" { + excludeMap[exclude] = true + } + } + + wantList := make([]string, 0, len(g.Want)) + for _, want := range g.Want { + if want = strings.ToUpper(strings.TrimSpace(want)); want != "" && !excludeMap[want] { + wantList = append(wantList, want) + } + } + + if len(wantList) > 0 { + slices.Sort(wantList) + return wantList + } + + list := make([]string, 0, 300) + for entry := range container.Loop() { + name := entry.GetName() + if excludeMap[name] { + continue + } + list = append(list, name) + } + + slices.Sort(list) + return list +} diff --git a/trie.go b/trie.go deleted file mode 100644 index a34748e11..000000000 --- a/trie.go +++ /dev/null @@ -1,73 +0,0 @@ -package main - -import ( - "errors" - "strings" -) - -type node struct { - leaf bool - children map[string]*node -} - -func newNode() *node { - return &node{ - leaf: false, - children: make(map[string]*node), - } -} - -func (n *node) getChild(s string) *node { - return n.children[s] -} - -func (n *node) hasChild(s string) bool { - return n.getChild(s) != nil -} - -func (n *node) addChild(s string, child *node) { - n.children[s] = child -} - -func (n *node) isLeaf() bool { - return n.leaf -} - -// DomainTrie is a domain trie for domain type rules. -type DomainTrie struct { - root *node -} - -// NewDomainTrie creates and returns a new domain trie. -func NewDomainTrie() *DomainTrie { - return &DomainTrie{ - root: newNode(), - } -} - -// Insert inserts a domain rule string into the domain trie -// and return whether is inserted successfully or not. -func (t *DomainTrie) Insert(domain string) (bool, error) { - if domain == "" { - return false, errors.New("empty domain") - } - parts := strings.Split(domain, ".") - - node := t.root - for i := len(parts) - 1; i >= 0; i-- { - part := parts[i] - - if node.isLeaf() { - return false, nil - } - if !node.hasChild(part) { - node.addChild(part, newNode()) - if i == 0 { - node.getChild(part).leaf = true - return true, nil - } - } - node = node.getChild(part) - } - return false, nil -}