From 4beb998e4947d84c152c8661efd3e5fd39dcc2fa Mon Sep 17 00:00:00 2001 From: Brian Schrader Date: Tue, 25 Jan 2022 17:19:20 -0800 Subject: [PATCH] Adds RSS Item Categories as ParsedArticle Tags --- Sources/ObjC/RSParsedArticle.h | 2 ++ Sources/ObjC/RSParsedArticle.m | 12 ++++++++++++ Sources/ObjC/RSRSSParser.m | 14 ++++++++++++++ .../Swift/Feeds/XML/RSParsedFeedTransformer.swift | 3 ++- Tests/RSParserTests/RSSParserTests.swift | 8 ++++++++ 5 files changed, 38 insertions(+), 1 deletion(-) diff --git a/Sources/ObjC/RSParsedArticle.h b/Sources/ObjC/RSParsedArticle.h index a2bfb31..37fbe9e 100755 --- a/Sources/ObjC/RSParsedArticle.h +++ b/Sources/ObjC/RSParsedArticle.h @@ -25,12 +25,14 @@ @property (nonatomic, nullable) NSString *permalink; @property (nonatomic, nullable) NSSet *authors; @property (nonatomic, nullable) NSSet *enclosures; +@property (nonatomic, nullable) NSSet *categories; @property (nonatomic, nullable) NSDate *datePublished; @property (nonatomic, nullable) NSDate *dateModified; @property (nonatomic, nonnull) NSDate *dateParsed; @property (nonatomic, nullable) NSString *language; - (void)addEnclosure:(RSParsedEnclosure *_Nonnull)enclosure; +- (void)addCategory:(NSString *_Nonnull)category; - (void)addAuthor:(RSParsedAuthor *_Nonnull)author; @end diff --git a/Sources/ObjC/RSParsedArticle.m b/Sources/ObjC/RSParsedArticle.m index b94930a..38d3475 100755 --- a/Sources/ObjC/RSParsedArticle.m +++ b/Sources/ObjC/RSParsedArticle.m @@ -60,6 +60,18 @@ - (void)addAuthor:(RSParsedAuthor *)author { } } +#pragma mark - Categories + +- (void)addCategory:(NSString *)category { + + if (self.categories) { + self.categories = [self.categories setByAddingObject:category]; + } + else { + self.categories = [NSSet setWithObject:category]; + } +} + #pragma mark - articleID - (NSString *)articleID { diff --git a/Sources/ObjC/RSRSSParser.m b/Sources/ObjC/RSRSSParser.m index 455320a..d243949 100755 --- a/Sources/ObjC/RSRSSParser.m +++ b/Sources/ObjC/RSRSSParser.m @@ -171,6 +171,9 @@ - (RSParsedFeed *)parseFeed { static const char *kLanguage = "language"; static const NSInteger kLanguageLength = 9; +static const char *kCategory = "category"; +static const NSInteger kCategoryLength = 9; + #pragma mark - Parsing - (void)parse { @@ -240,6 +243,14 @@ - (void)addDCElement:(const xmlChar *)localName { } } +- (void)addCategoryWithString:(NSString *)categoryString { + + if (RSParserStringIsEmpty(categoryString)) { + return; + } + + [self.currentArticle addCategory:categoryString]; +} - (void)addGuid { @@ -356,6 +367,9 @@ - (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)pre else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { self.currentArticle.link = [self urlString:[self currentString]]; } + else if (RSSAXEqualTags(localName, kCategory, kCategoryLength)) { + [self addCategoryWithString:[self currentString]]; + } else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) { if (!self.currentArticle.body) { diff --git a/Sources/Swift/Feeds/XML/RSParsedFeedTransformer.swift b/Sources/Swift/Feeds/XML/RSParsedFeedTransformer.swift index 4d680ac..89eb4b1 100644 --- a/Sources/Swift/Feeds/XML/RSParsedFeedTransformer.swift +++ b/Sources/Swift/Feeds/XML/RSParsedFeedTransformer.swift @@ -46,8 +46,9 @@ private extension RSParsedFeedTransformer { let dateModified = parsedArticle.dateModified let authors = parsedAuthors(parsedArticle.authors) let attachments = parsedAttachments(parsedArticle.enclosures) + let tags = parsedArticle.categories - return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments) + return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments) } static func parsedAuthors(_ authors: Set?) -> Set? { diff --git a/Tests/RSParserTests/RSSParserTests.swift b/Tests/RSParserTests/RSSParserTests.swift index 71384fa..7758c1e 100644 --- a/Tests/RSParserTests/RSSParserTests.swift +++ b/Tests/RSParserTests/RSSParserTests.swift @@ -175,6 +175,14 @@ class RSSParserTests: XCTestCase { XCTAssertEqual(parsedFeed.language, "en-US") } + func testFeedCategoriesAsTags() { + let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/") + let parsedFeed = try! FeedParser.parse(d)! + for article in parsedFeed.items { + XCTAssertNotNil(article.tags) + } + } + // func testFeedWithGB2312Encoding() { // // This feed has an encoding we don’t run into very often. // // https://github.com/Ranchero-Software/NetNewsWire/issues/1477