Skip to content

Commit

Permalink
grab/venustech: add VenustechCrawler
Browse files Browse the repository at this point in the history
  • Loading branch information
TardC authored and zema1 committed Nov 19, 2024
1 parent fcb1d8b commit 9bbc19a
Show file tree
Hide file tree
Showing 2 changed files with 221 additions and 0 deletions.
190 changes: 190 additions & 0 deletions grab/venustech.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
package grab

import (
"bytes"
"context"
"fmt"
"path"
"strings"
"time"

"github.com/PuerkitoBio/goquery"
"github.com/imroc/req/v3"
"github.com/kataras/golog"

"github.com/zema1/watchvuln/util"
)

type VenustechCrawler struct {
client *req.Client
log *golog.Logger
}

func (v *VenustechCrawler) ProviderInfo() *Provider {
return &Provider{
Name: "venustech",
DisplayName: "启明星辰漏洞通告",
Link: "https://www.venustech.com.cn/new_type/aqtg/",
}
}

func (v *VenustechCrawler) IsValuable(info *VulnInfo) bool {
return info.Severity == High || info.Severity == Critical
}

func (v *VenustechCrawler) GetUpdate(ctx context.Context, pageLimit int) ([]*VulnInfo, error) {
var results []*VulnInfo

for i := 1; i <= pageLimit; i++ {
select {
case <-ctx.Done():
return results, ctx.Err()
default:
}

pageResult, err := v.parsePage(ctx, i)
if err != nil {
return results, err
}
v.log.Infof("got %d vulns from page %d", len(pageResult), i)
results = append(results, pageResult...)
}

return results, nil
}

func (v *VenustechCrawler) parsePage(ctx context.Context, page int) ([]*VulnInfo, error) {
rawURL := "https://www.venustech.com.cn/new_type/aqtg/"
if page > 1 {
rawURL = fmt.Sprintf("%sindex_%d.html", rawURL, page)
}

resp, err := v.client.R().SetContext(ctx).Get(rawURL)
if err != nil {
return nil, err
}

doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Bytes()))
if err != nil {
return nil, err
}

itemsSel := doc.Find("body > div > div.wrapper.clearfloat > div.right.main-content > div > div.main-inner-bt > ul > li > a")
itemsCnt := itemsSel.Length()
if itemsCnt == 0 {
v.log.Errorf("invalid response is \n%s", resp.Dump())
return nil, fmt.Errorf("goquery find zero vulns")
}

results := make([]*VulnInfo, 0, itemsCnt)
itemsSel.Each(func(i int, s *goquery.Selection) {
// 微软月度、Oracle 季度补丁日漏洞通告不抓取
if strings.Contains(s.Text(), "多个安全漏洞") {
return
}

if href, ok := s.Attr("href"); ok {
vulnURL := "https://www.venustech.com.cn" + href
vulnInfo, err := v.parseSingle(ctx, vulnURL)
if err != nil {
v.log.Errorf("%s %s", err, vulnURL)
return
}
results = append(results, vulnInfo)
} else {
v.log.Errorf("failed to get href")
}
})

return results, nil
}

func (v *VenustechCrawler) parseSingle(ctx context.Context, vulnURL string) (*VulnInfo, error) {
v.log.Debugf("parsing vuln %s", vulnURL)
resp, err := v.client.R().SetContext(ctx).Get(vulnURL)
if err != nil {
return nil, err
}

doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Bytes()))
if err != nil {
return nil, err
}
contentSel := doc.Find("body > div > div.wrapper.clearfloat > div.right.main-content > div > div > div.news-content.ctn")
vulnTableSel := contentSel.Find("div > table").First()

// 提取开头第一个表格的内容
vulnDataSel := vulnTableSel.Find("tbody > tr > td")
if vulnDataSel.Length() <= 0 || vulnDataSel.Length()%2 == 1 {
return nil, fmt.Errorf("invald vuln table")
}
var vulnInfo VulnInfo
for i, spaceReplacer := 0, strings.NewReplacer(" ", "", "\u00A0", ""); i < vulnDataSel.Length(); i += 2 {
keyText := spaceReplacer.Replace(vulnDataSel.Eq(i).Text())
valueText := strings.TrimSpace(vulnDataSel.Eq(i + 1).Text())

switch keyText {
case "漏洞名称":
vulnInfo.Title = valueText
case "CVEID":
if strings.Contains(valueText, "CVE") {
// 多个 CVE 取第一个
if strings.Contains(valueText, "、") {
vulnInfo.CVE = strings.Split(valueText, "、")[0]
} else {
vulnInfo.CVE = valueText
}
}
case "发现时间":
_, err = time.Parse("2006-01-02", valueText)
if err == nil {
vulnInfo.Disclosure = valueText
}
case "漏洞等级", "等级":
vulnInfo.Severity = Low
switch valueText {
case "高危":
vulnInfo.Severity = High
case "中危":
vulnInfo.Severity = Medium
case "低危":
vulnInfo.Severity = Low
}
default:
}
}

if vulnInfo.Title == "" {
title := strings.TrimSpace(contentSel.Find("h3").Text())
vulnInfo.Title = strings.TrimPrefix(title, "【漏洞通告】")
}
// 使用文件名做为 UniqueKey
filename := path.Base(resp.Request.URL.Path)
ext := path.Ext(filename)
vulnInfo.UniqueKey = strings.TrimSuffix(filename, ext) + "_venustech"
vulnInfo.From = vulnURL
// 提取描述内容
vulnInfo.Description = strings.TrimSpace(vulnTableSel.NextUntil("h2").Text())
// 提取参考链接
contentSel.Find("div > h3").Each(func(i int, s *goquery.Selection) {
if strings.Contains(s.Text(), "参考链接") {
s.NextUntil("h2").Each(func(i int, s *goquery.Selection) {
ref := strings.TrimSpace(s.Text())
if ref != "" {
vulnInfo.References = append(vulnInfo.References, ref)
}
})
}
})
vulnInfo.Creator = v
return &vulnInfo, nil
}

func NewVenustechCrawler() Grabber {
client := util.NewHttpClient()

return &VenustechCrawler{
client: client,
log: golog.Child("[venustech]"),
}
}
31 changes: 31 additions & 0 deletions grab/venustech_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package grab

import (
"context"
"testing"
"time"

"github.com/stretchr/testify/require"
)

func TestVenustech(t *testing.T) {
assert := require.New(t)
ctx, cancel := context.WithTimeout(context.Background(), time.Second*300)
defer cancel()

grab := NewVenustechCrawler()
vulns, err := grab.GetUpdate(ctx, 5)
assert.Nil(err)

count := 0
for _, v := range vulns {
t.Logf("get vuln info %s", v)
count++
assert.NotEmpty(v.UniqueKey)
assert.NotEmpty(v.Description)
assert.NotEmpty(v.Title)
assert.NotEmpty(v.Disclosure)
assert.NotEmpty(v.From)
}
assert.Greater(count, 0)
}

0 comments on commit 9bbc19a

Please sign in to comment.