Skip to content

Commit

Permalink
完成正文提取和基本的html解析
Browse files Browse the repository at this point in the history
  • Loading branch information
vanry committed Nov 5, 2016
1 parent aaca290 commit 7962648
Show file tree
Hide file tree
Showing 5 changed files with 518 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/vendor
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# readability
Developed from https://github.com/feelinglucky/php-readability
Automatic article content extraction from html.
23 changes: 23 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"name": "vanry/readability",
"description": "Automatic article content extraction from html and html parser.",
"keywords": ["readability", "extraction", "html", "parser"],
"type": "library",
"license": "MIT",
"minimum-stability": "dev",
"homepage": "http://vanry.me",
"authors": [
{
"name": "vanry",
"email": "[email protected]"
}
],
"require": {
"php": ">=5.4"
},
"autoload": {
"psr-4": {
"Readability\\": "src/"
}
}
}
20 changes: 20 additions & 0 deletions demo.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php

require 'vendor/autoload.php';

$url = 'http://sports.sina.com.cn/basketball/nba/2016-11-05/doc-ifxxnety7387922.shtml';

$html = file_get_contents($url);

$readability = new Readability\Readability($html);

// $readability->load($html);

echo 'Title: '.$readability->title().PHP_EOL;
echo 'Date: '.$readability->date().PHP_EOL;
echo 'Text: '.$readability->text().PHP_EOL;
echo 'Content: '.$readability->content().PHP_EOL;
echo 'WordCount: '.$readability->wordCount().PHP_EOL;

// image source
print_r($readability->images());
Loading

0 comments on commit 7962648

Please sign in to comment.