Skip to content

Commit

Permalink
add NurembergerNachrichten bridge
Browse files Browse the repository at this point in the history
apply suggested changes and fix regions

put collectData on top

replace self:: with -> for methodcalls
  • Loading branch information
theScrabi committed Aug 7, 2024
1 parent 4faaa79 commit 95c3a52
Showing 1 changed file with 181 additions and 0 deletions.
181 changes: 181 additions & 0 deletions bridges/NurembergerNachrichtenBridge.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
<?php

class NurembergerNachrichtenBridge extends BridgeAbstract
{
const MAINTAINER = 'schabi.org';
const NAME = 'Nürnberger Nachrichten';
const CACHE_TIMEOUT = 3600;
const URI = 'https://www.nn.de';
const DESCRIPTION = 'Bridge for Bavarian regional news site nordbayern.de';
const PARAMETERS = [ [
'region' => [
'name' => 'region',
'type' => 'list',
'exampleValue' => 'Nürnberg',
'title' => 'Select a region',
'values' => [
'Ansbach' => 'ansbach',
'Erlangen' => 'erlangen',
'Erlangen-Höchstadt' => 'erlangen-hoechstadt',
'Forchheim' => 'forchheim',
'Fürth' => 'fuerth',
'Gunzenhausen' => 'gunzenhausen',
'Neumarkt' => 'neumarkt',
'Neustadt/Aisch-Bad Windsheim' => 'neustadt-aisch-bad-windsheim',
'Nürnberg' => 'nuernberg',
'Nürnberger Land' => 'nuernberger-land',
'Pegnitz' => 'pegnitz',
'Roth' => 'roth',
'Schwabach' => 'schwabach',
'Weißenburg' => 'weissenburg'
]
],
'hideNNPlus' => [
'name' => 'Hide NN+ articles',
'type' => 'checkbox',
'exampleValue' => 'unchecked',
'title' => 'Hide all paywall articles on NN'
],
]];

public function collectData()
{
$region = $this->getInput('region');
if (
$region === 'neustadt-aisch-bad-windsheim' ||
$region === 'erlangen-hoechstadt' ||
$region === ''
) {
$region = 'region/' . $region;
}
$url = self::URI . '/' . $region;
$listSite = getSimpleHTMLDOM($url);

$this->handleNewsblock($listSite);
}


private function getValidImage($picture)
{
$img = $picture->find('img', 0);
if ($img) {
$imgUrl = $img->src;
if (!preg_match('#/logo-.*\.png#', $imgUrl)) {
return '<br><img src="' . $imgUrl . '">';
}
}
return '';
}

private function getUseFullContent($rawContent)
{
$content = '';
foreach ($rawContent->children as $element) {
if (
($element->tag === 'p' || $element->tag === 'h3') &&
$element->class !== 'article__teaser'
) {
$content .= $element;
} elseif ($element->tag === 'main') {
$content .= $this->getUseFullContent($element->find('article', 0));
} elseif ($element->tag === 'header') {
$content .= $this->getUseFullContent($element);
} elseif (
$element->tag === 'div' &&
!str_contains($element->class, 'article__infobox') &&
!str_contains($element->class, 'authorinfo')
) {
$content .= $this->getUseFullContent($element);
} elseif (
$element->tag === 'section' &&
(str_contains($element->class, 'article__richtext') ||
str_contains($element->class, 'article__context'))
) {
$content .= $this->getUseFullContent($element);
} elseif ($element->tag === 'picture') {
$content .= $this->getValidImage($element);
} elseif ($element->tag === 'ul') {
$content .= $element;
}
}
return $content;
}

private function getTeaser($content)
{
$teaser = $content->find('p[class=article__teaser]', 0);
if ($teaser === null) {
return '';
}
$teaser = $teaser->plaintext;
$teaser = preg_replace('/[ ]{2,}/', ' ', $teaser);
$teaser = '<p class="article__teaser">' . $teaser . '</p>';
return $teaser;
}

private function parseArticle($article, $link)
{
$item = [];
defaultLinkTo($article, self::URI);
$content = $article->find('article[id=article]', 0);
$item['uri'] = $link;

$author = $article->find('.article__author', 1);
if ($author !== null) {
$item['author'] = trim($author->plaintext);
}

$createdAt = $article->find('[class=article__release]', 0);
if ($createdAt) {
$item['timestamp'] = strtotime(str_replace('Uhr', '', $createdAt->plaintext));
}

if ($article->find('h2', 0) === null) {
$item['title'] = $article->find('h3', 0)->innertext;
} else {
$item['title'] = $article->find('h2', 0)->innertext;
}
$item['content'] = '';

if ($article->find('section[class*=article__richtext]', 0) === null) {
$content = $article->find('div[class*=modul__teaser]', 0)
->find('p', 0);
$item['content'] .= $content;
} else {
$content = $article->find('article', 0);
// change order of article teaser in order to show it on top
// of the title image. If we didn't do this some rss programs
// would show the subtitle of the title image as teaser instead
// of the actuall article teaser.
$item['content'] .= $this->getTeaser($content);
$item['content'] .= $this->getUseFullContent($content);
}

return $item;
}

private function handleNewsblock($listSite)
{
$main = $listSite->find('main', 0);
foreach ($main->find('article') as $article) {
$url = $article->find('a', 0)->href;
$url = urljoin(self::URI, $url);

$articleContent = getSimpleHTMLDOM($url);

// exclude nn+ articles if desired
if (
$this->getInput('hideNNPlus') &&
str_contains($articleContent->find('article[id=article]', 0)
->find('header', 0), 'icon-nnplus')
) {
continue;
}

$item = $this->parseArticle($articleContent, $url);
$articleContent->clear();

$this->items[] = $item;
}
}
}

0 comments on commit 95c3a52

Please sign in to comment.