Skip to content

Commit

Permalink
优化xiaohongshu下载
Browse files Browse the repository at this point in the history
  • Loading branch information
iAsuma committed Apr 17, 2024
1 parent 511cc54 commit 85bcc2f
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
2 changes: 2 additions & 0 deletions extractors/extractors.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ func Extract(u string, option Options) ([]*Data, error) {
}
if u.Host == "haokan.baidu.com" {
domain = "haokan"
} else if u.Host == "xhslink.com" {
domain = "xiaohongshu"
} else {
domain = utils.Domain(u.Host)
}
Expand Down
12 changes: 11 additions & 1 deletion extractors/xiaohongshu/xiaohongshu.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package xiaohongshu

import (
"encoding/json"
neturl "net/url"
"strconv"
"strings"

Expand Down Expand Up @@ -34,7 +35,7 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor
}

// title
titles := utils.MatchOneOf(html, `,"title":"(.+?)",`)
titles := utils.MatchOneOf(html, `<title>(.*?)</title>`)
if titles == nil || len(titles) != 2 {
return nil, errors.WithStack(extractors.ErrBodyParseFailed)
}
Expand All @@ -51,6 +52,11 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor
return nil, errors.WithStack(extractors.ErrBodyParseFailed)
}

pUrl, err := neturl.ParseRequestURI(url)
if err != nil {
return nil, errors.WithStack(err)
}

// streams
streams := make(map[string]*extractors.Stream)
var size int64
Expand All @@ -62,6 +68,10 @@ func (e *extractor) Extract(url string, option extractors.Options) ([]*extractor
if err != nil {
continue
}

if pUrl.Host == "xhslink.com" && strings.Contains(u, "sns-video-qc") {
size += 1 // Make sure the link is downloadable and sort the link first with the same size
}
streams[strconv.Itoa(i)] = &extractors.Stream{
Parts: []*extractors.Part{
{
Expand Down

0 comments on commit 85bcc2f

Please sign in to comment.