diff --git a/extractors/extractors.go b/extractors/extractors.go index 2954d32ed..76937d83a 100644 --- a/extractors/extractors.go +++ b/extractors/extractors.go @@ -41,6 +41,8 @@ func Extract(u string, option Options) ([]*Data, error) { } if u.Host == "haokan.baidu.com" { domain = "haokan" + } else if u.Host == "xhslink.com" { + domain = "xiaohongshu" } else { domain = utils.Domain(u.Host) } diff --git a/extractors/xiaohongshu/xiaohongshu.go b/extractors/xiaohongshu/xiaohongshu.go index 0a255c909..103b79be0 100644 --- a/extractors/xiaohongshu/xiaohongshu.go +++ b/extractors/xiaohongshu/xiaohongshu.go @@ -2,6 +2,7 @@ package xiaohongshu import ( "encoding/json" + neturl "net/url" "strconv" "strings" @@ -34,7 +35,7 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor } // title - titles := utils.MatchOneOf(html, `,"title":"(.+?)",`) + titles := utils.MatchOneOf(html, `(.*?)`) if titles == nil || len(titles) != 2 { return nil, errors.WithStack(extractors.ErrBodyParseFailed) } @@ -51,6 +52,11 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor return nil, errors.WithStack(extractors.ErrBodyParseFailed) } + pUrl, err := neturl.ParseRequestURI(url) + if err != nil { + return nil, errors.WithStack(err) + } + // streams streams := make(map[string]*extractors.Stream) var size int64 @@ -62,6 +68,10 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor if err != nil { continue } + + if pUrl.Host == "xhslink.com" && strings.Contains(u, "sns-video-qc") { + size += 1 // Make sure the link is downloadable and sort the link first with the same size + } streams[strconv.Itoa(i)] = &extractors.Stream{ Parts: []*extractors.Part{ {