1
+ using System . Net ;
2
+ using System . Text . RegularExpressions ;
3
+ using HtmlAgilityPack ;
4
+ using Soenneker . Utils . String . NeedlemanWunsch ;
5
+ using Tranga . Jobs ;
6
+
7
+ namespace Tranga . MangaConnectors ;
8
+
9
+ public class Weebcentral : MangaConnector
10
+ {
11
+ private readonly string _baseUrl = "https://weebcentral.com" ;
12
+
13
+ private readonly string [ ] _filterWords =
14
+ { "a" , "the" , "of" , "as" , "to" , "no" , "for" , "on" , "with" , "be" , "and" , "in" , "wa" , "at" , "be" , "ni" } ;
15
+
16
+ public Weebcentral ( GlobalBase clone ) : base ( clone , "Weebcentral" , [ "en" ] )
17
+ {
18
+ downloadClient = new ChromiumDownloadClient ( clone ) ;
19
+ }
20
+
21
+ public override Manga [ ] GetManga ( string publicationTitle = "" )
22
+ {
23
+ Log ( $ "Searching Publications. Term=\" { publicationTitle } \" ") ;
24
+ const int limit = 32 ; //How many values we want returned at once
25
+ var offset = 0 ; //"Page"
26
+ var requestUrl =
27
+ $ "{ _baseUrl } /search/data?limit={ limit } &offset={ offset } &text={ publicationTitle } &sort=Best+Match&order=Ascending&official=Any&display_mode=Minimal%20Display";
28
+ var requestResult =
29
+ downloadClient . MakeRequest ( requestUrl , RequestType . Default ) ;
30
+ if ( ( int ) requestResult . statusCode < 200 || ( int ) requestResult . statusCode >= 300 ||
31
+ requestResult . htmlDocument == null )
32
+ {
33
+ Log ( $ "Failed to retrieve search: { requestResult . statusCode } ") ;
34
+ return [ ] ;
35
+ }
36
+
37
+ var publications = ParsePublicationsFromHtml ( requestResult . htmlDocument ) ;
38
+ Log ( $ "Retrieved { publications . Length } publications. Term=\" { publicationTitle } \" ") ;
39
+
40
+ return publications ;
41
+ }
42
+
43
+ private Manga [ ] ParsePublicationsFromHtml ( HtmlDocument document )
44
+ {
45
+ if ( document . DocumentNode . SelectNodes ( "//article" ) == null )
46
+ return Array . Empty < Manga > ( ) ;
47
+
48
+ var urls = document . DocumentNode . SelectNodes ( "/html/body/article/a[@class='link link-hover']" )
49
+ . Select ( elem => elem . GetAttributeValue ( "href" , "" ) ) . ToList ( ) ;
50
+
51
+ HashSet < Manga > ret = new ( ) ;
52
+ foreach ( var url in urls )
53
+ {
54
+ var manga = GetMangaFromUrl ( url ) ;
55
+ if ( manga is not null )
56
+ ret . Add ( ( Manga ) manga ) ;
57
+ }
58
+
59
+ return ret . ToArray ( ) ;
60
+ }
61
+
62
+ public override Manga ? GetMangaFromUrl ( string url )
63
+ {
64
+ Regex publicationIdRex = new ( @"https:\/\/weebcentral\.com\/series\/(\w*)\/(.*)" ) ;
65
+ var publicationId = publicationIdRex . Match ( url ) . Groups [ 1 ] . Value ;
66
+
67
+ var requestResult = downloadClient . MakeRequest ( url , RequestType . MangaInfo ) ;
68
+ if ( ( int ) requestResult . statusCode < 300 && ( int ) requestResult . statusCode >= 200 &&
69
+ requestResult . htmlDocument is not null )
70
+ return ParseSinglePublicationFromHtml ( requestResult . htmlDocument , publicationId , url ) ;
71
+ return null ;
72
+ }
73
+
74
+ private Manga ParseSinglePublicationFromHtml ( HtmlDocument document , string publicationId , string websiteUrl )
75
+ {
76
+ var posterNode =
77
+ document . DocumentNode . SelectSingleNode ( "//section[@class='flex items-center justify-center']/picture/img" ) ;
78
+ var posterUrl = posterNode ? . GetAttributeValue ( "src" , "" ) ?? "" ;
79
+ var coverFileNameInCache = SaveCoverImageToCache ( posterUrl , publicationId , RequestType . MangaCover ) ;
80
+
81
+ var titleNode = document . DocumentNode . SelectSingleNode ( "//section/h1" ) ;
82
+ var sortName = titleNode ? . InnerText ?? "Undefined" ;
83
+
84
+ HtmlNode [ ] authorsNodes =
85
+ document . DocumentNode . SelectNodes ( "//ul/li[strong/text() = 'Author(s): ']/span" ) ? . ToArray ( ) ?? [ ] ;
86
+ var authors = authorsNodes . Select ( n => n . InnerText ) . ToList ( ) ;
87
+
88
+ HtmlNode [ ] genreNodes =
89
+ document . DocumentNode . SelectNodes ( "//ul/li[strong/text() = 'Tags(s): ']/span" ) ? . ToArray ( ) ?? [ ] ;
90
+ HashSet < string > tags = genreNodes . Select ( n => n . InnerText ) . ToHashSet ( ) ;
91
+
92
+ var statusNode = document . DocumentNode . SelectSingleNode ( "//ul/li[strong/text() = 'Status: ']/a" ) ;
93
+ var status = statusNode ? . InnerText ?? "" ;
94
+ Log ( "unable to parse status" ) ;
95
+ var releaseStatus = Manga . ReleaseStatusByte . Unreleased ;
96
+ switch ( status . ToLower ( ) )
97
+ {
98
+ case "cancelled" : releaseStatus = Manga . ReleaseStatusByte . Cancelled ; break ;
99
+ case "hiatus" : releaseStatus = Manga . ReleaseStatusByte . OnHiatus ; break ;
100
+ case "complete" : releaseStatus = Manga . ReleaseStatusByte . Completed ; break ;
101
+ case "ongoing" : releaseStatus = Manga . ReleaseStatusByte . Continuing ; break ;
102
+ }
103
+
104
+ var yearNode = document . DocumentNode . SelectSingleNode ( "//ul/li[strong/text() = 'Released: ']/span" ) ;
105
+ var year = Convert . ToInt32 ( yearNode ? . InnerText ?? "0" ) ;
106
+
107
+ var descriptionNode = document . DocumentNode . SelectSingleNode ( "//ul/li[strong/text() = 'Description']/p" ) ;
108
+ var description = descriptionNode ? . InnerText ?? "Undefined" ;
109
+
110
+ HtmlNode [ ] altTitleNodes = document . DocumentNode
111
+ . SelectNodes ( "//ul/li[strong/text() = 'Associated Name(s)']/ul/li" ) ? . ToArray ( ) ?? [ ] ;
112
+ Dictionary < string , string > altTitles = new ( ) , links = new ( ) ;
113
+ for ( var i = 0 ; i < altTitleNodes . Length ; i ++ )
114
+ altTitles . Add ( i . ToString ( ) , altTitleNodes [ i ] . InnerText ) ;
115
+
116
+ var originalLanguage = "" ;
117
+
118
+ Manga manga = new ( sortName , authors . ToList ( ) , description , altTitles , tags . ToArray ( ) , posterUrl ,
119
+ coverFileNameInCache , links ,
120
+ year , originalLanguage , publicationId , releaseStatus , websiteUrl ) ;
121
+ AddMangaToCache ( manga ) ;
122
+ return manga ;
123
+ }
124
+
125
+ public override Manga ? GetMangaFromId ( string publicationId )
126
+ {
127
+ return GetMangaFromUrl ( $ "https://weebcentral.com/series/{ publicationId } ") ;
128
+ }
129
+
130
+ private string ToFilteredString ( string input )
131
+ {
132
+ return string . Join ( ' ' , input . ToLower ( ) . Split ( ' ' ) . Where ( word => _filterWords . Contains ( word ) == false ) ) ;
133
+ }
134
+
135
+ private SearchResult [ ] FilteredResults ( string publicationTitle , SearchResult [ ] unfilteredSearchResults )
136
+ {
137
+ Dictionary < SearchResult , int > similarity = new ( ) ;
138
+ foreach ( var sr in unfilteredSearchResults )
139
+ {
140
+ List < int > scores = new ( ) ;
141
+ var filteredPublicationString = ToFilteredString ( publicationTitle ) ;
142
+ var filteredSString = ToFilteredString ( sr . s ) ;
143
+ scores . Add ( NeedlemanWunschStringUtil . CalculateSimilarity ( filteredSString , filteredPublicationString ) ) ;
144
+ foreach ( var srA in sr . a )
145
+ {
146
+ var filteredAString = ToFilteredString ( srA ) ;
147
+ scores . Add ( NeedlemanWunschStringUtil . CalculateSimilarity ( filteredAString , filteredPublicationString ) ) ;
148
+ }
149
+
150
+ similarity . Add ( sr , scores . Sum ( ) / scores . Count ) ;
151
+ }
152
+
153
+ var ret = similarity . OrderBy ( s => s . Value ) . Take ( 10 ) . Select ( s => s . Key ) . ToList ( ) ;
154
+ return ret . ToArray ( ) ;
155
+ }
156
+
157
+ public override Chapter [ ] GetChapters ( Manga manga , string language = "en" )
158
+ {
159
+ Log ( $ "Getting chapters { manga } ") ;
160
+ var requestUrl = $ "{ _baseUrl } /series/{ manga . publicationId } /full-chapter-list";
161
+ var requestResult =
162
+ downloadClient . MakeRequest ( requestUrl , RequestType . Default ) ;
163
+ if ( ( int ) requestResult . statusCode < 200 || ( int ) requestResult . statusCode >= 300 )
164
+ return Array . Empty < Chapter > ( ) ;
165
+
166
+ //Return Chapters ordered by Chapter-Number
167
+ if ( requestResult . htmlDocument is null )
168
+ return Array . Empty < Chapter > ( ) ;
169
+ var chapters = ParseChaptersFromHtml ( manga , requestResult . htmlDocument ) ;
170
+ Log ( $ "Got { chapters . Count } chapters. { manga } ") ;
171
+ return chapters . Order ( ) . ToArray ( ) ;
172
+ }
173
+
174
+ private List < Chapter > ParseChaptersFromHtml ( Manga manga , HtmlDocument document )
175
+ {
176
+ var chaptersWrapper = document . DocumentNode . SelectSingleNode ( "/html/body" ) ;
177
+
178
+ Regex chapterRex = new ( @".* (\d+)" ) ;
179
+ Regex idRex = new ( @"https:\/\/weebcentral\.com\/chapters\/(\w*)" ) ;
180
+
181
+ var ret = chaptersWrapper . Descendants ( "a" ) . Select ( elem =>
182
+ {
183
+ var url = elem . GetAttributeValue ( "href" , "" ) ?? "Undefined" ;
184
+
185
+ if ( ! url . StartsWith ( "https://" ) && ! url . StartsWith ( "http://" ) )
186
+ return new Chapter ( manga , null , null , "-1" , "undefined" ) ;
187
+
188
+ var idMatch = idRex . Match ( url ) ;
189
+ var id = idMatch . Success ? idMatch . Groups [ 1 ] . Value : null ;
190
+
191
+ var chapterNode = elem . SelectSingleNode ( "span[@class='grow flex items-center gap-2']/span" ) ? . InnerText ??
192
+ "Undefined" ;
193
+
194
+ var chapterNumberMatch = chapterRex . Match ( chapterNode ) ;
195
+ var chapterNumber = chapterNumberMatch . Success ? chapterNumberMatch . Groups [ 1 ] . Value : "-1" ;
196
+
197
+ return new Chapter ( manga , null , null , chapterNumber , url , id ) ;
198
+ } ) . Where ( elem => elem . chapterNumber != - 1 && elem . url != "undefined" ) . ToList ( ) ;
199
+
200
+ ret . Reverse ( ) ;
201
+ return ret ;
202
+ }
203
+
204
+ public override HttpStatusCode DownloadChapter ( Chapter chapter , ProgressToken ? progressToken = null )
205
+ {
206
+ if ( progressToken ? . cancellationRequested ?? false )
207
+ {
208
+ progressToken . Cancel ( ) ;
209
+ return HttpStatusCode . RequestTimeout ;
210
+ }
211
+
212
+ var chapterParentManga = chapter . parentManga ;
213
+ if ( progressToken ? . cancellationRequested ?? false )
214
+ {
215
+ progressToken . Cancel ( ) ;
216
+ return HttpStatusCode . RequestTimeout ;
217
+ }
218
+
219
+ Log ( $ "Retrieving chapter-info { chapter } { chapterParentManga } ") ;
220
+
221
+ var requestResult = downloadClient . MakeRequest ( chapter . url , RequestType . Default ) ;
222
+ if ( requestResult . htmlDocument is null )
223
+ {
224
+ progressToken ? . Cancel ( ) ;
225
+ return HttpStatusCode . RequestTimeout ;
226
+ }
227
+
228
+ var document = requestResult . htmlDocument ;
229
+
230
+ var imageNodes =
231
+ document . DocumentNode . SelectNodes ( $ "//section[@hx-get='{ chapter . url } /images']/img") ? . ToArray ( ) ?? [ ] ;
232
+ var urls = imageNodes . Select ( imgNode => imgNode . GetAttributeValue ( "src" , "" ) ) . ToArray ( ) ;
233
+
234
+ return DownloadChapterImages ( urls , chapter , RequestType . MangaImage , progressToken : progressToken ) ;
235
+ }
236
+
237
+ private struct SearchResult
238
+ {
239
+ public string i { get ; set ; }
240
+ public string s { get ; set ; }
241
+ public string [ ] a { get ; set ; }
242
+ }
243
+ }
0 commit comments