|
1 |
| -# ParseWiki |
2 |
| -A library that helps parse wikitext data. |
| 1 | +# WikiConnect ParseWiki |
3 | 2 |
|
4 |
| -## Installation |
| 3 | +A powerful PHP library for parsing MediaWiki-style content from raw wiki text. |
5 | 4 |
|
6 |
| -Use composer to install the library and all its dependencies: |
| 5 | +--- |
7 | 6 |
|
8 |
| - composer require wiki-connect/parsewiki |
9 |
| -## Example Usage |
| 7 | +## 📚 Overview |
10 | 8 |
|
11 |
| -### Parse wikitext categorys |
12 |
| -```php |
13 |
| -use WikiConnect\ParseWiki\ParserCategorys; |
14 |
| -$parser = new ParserCategorys($wikitext); |
15 |
| -$parser->parse(); |
16 |
| -print_r($parser->getCategorys()); |
| 9 | +This library allows you to extract: |
| 10 | +- Templates (single, multiple, nested) |
| 11 | +- Internal wiki links |
| 12 | +- External links |
| 13 | +- Citations (references) |
| 14 | +- Categories (with or without display text) |
| 15 | + |
| 16 | +Perfect for handling wiki-formatted text in PHP projects. |
| 17 | + |
| 18 | +--- |
| 19 | + |
| 20 | +## 🗂️ Project Structure |
| 21 | + |
| 22 | +- `ParserTemplates`: Parses multiple templates. |
| 23 | +- `ParserTemplate`: Parses a single template. |
| 24 | +- `ParserInternalLinks`: Parses internal wiki links. |
| 25 | +- `ParserExternalLinks`: Parses external links. |
| 26 | +- `ParserCitations`: Parses citations and references. |
| 27 | +- `ParserCategories`: Parses categories from wiki text. |
| 28 | +- `DataModel` classes: |
| 29 | + - `Template` |
| 30 | + - `InternalLink` |
| 31 | + - `ExternalLink` |
| 32 | + - `Citation` |
| 33 | +- `tests/`: Contains PHPUnit test files: |
| 34 | + - `ParserTemplatesTest` |
| 35 | + - `ParserTemplateTest` |
| 36 | + - `ParserInternalLinksTest` |
| 37 | + - `ParserExternalLinksTest` |
| 38 | + - `ParserCitationsTest` |
| 39 | + - `ParserCategoriesTest` |
| 40 | + |
| 41 | +--- |
| 42 | + |
| 43 | +## 🚀 Features |
| 44 | + |
| 45 | +- ✅ Parse single and multiple templates. |
| 46 | +- ✅ Support nested templates. |
| 47 | +- ✅ Handle named and unnamed template parameters. |
| 48 | +- ✅ Extract internal links with or without display text. |
| 49 | +- ✅ Extract external links with or without labels. |
| 50 | +- ✅ Parse citations including attributes and special characters. |
| 51 | +- ✅ Parse categories, support custom namespaces, handle whitespaces and special characters. |
| 52 | +- ✅ Full PHPUnit test coverage. |
| 53 | + |
| 54 | +--- |
| 55 | + |
| 56 | +## ⚙️ Requirements |
| 57 | + |
| 58 | +- PHP 8.0 or higher |
| 59 | +- PHPUnit 9 or higher |
| 60 | + |
| 61 | +--- |
| 62 | + |
| 63 | +## 💻 Installation |
| 64 | + |
| 65 | +```bash |
| 66 | +composer require wiki-connect/parsewiki |
17 | 67 | ```
|
18 |
| -### Parse wikitext citations |
| 68 | + |
| 69 | +Make sure you have proper PSR-4 autoloading for the `WikiConnect\ParseWiki` namespace. |
| 70 | + |
| 71 | +--- |
| 72 | + |
| 73 | +## 🧪 Running Tests |
| 74 | + |
| 75 | +```bash |
| 76 | +vendor/bin/phpunit tests |
| 77 | +``` |
| 78 | + |
| 79 | +### Test Coverage: |
| 80 | +- **Templates:** Single, multiple, nested, named/unnamed parameters. |
| 81 | +- **Internal Links:** Simple, with display text, special characters. |
| 82 | +- **External Links:** With/without labels, multiple links, whitespace handling. |
| 83 | +- **Citations:** With/without attributes, special characters. |
| 84 | +- **Categories:** Simple, with display text, custom namespaces, whitespaces, special characters. |
| 85 | + |
| 86 | +--- |
| 87 | + |
| 88 | +## ✨ Example Usage |
| 89 | + |
| 90 | +### Parsing Templates |
| 91 | + |
19 | 92 | ```php
|
20 |
| -use WikiConnect\ParseWiki\ParserCitations; |
21 |
| -$parser = new ParserCitations($wikitext); |
22 |
| -$parser->parse($parser->getCitations()); |
| 93 | +use WikiConnect\ParseWiki\ParserTemplates; |
| 94 | + |
| 95 | +$text = '{{Infobox person|name=John Doe|birth_date=1990-01-01}}'; |
| 96 | + |
| 97 | +$parser = new ParserTemplates($text); |
| 98 | +$templates = $parser->getTemplates(); |
| 99 | + |
| 100 | +foreach ($templates as $template) { |
| 101 | + echo $template->getName(); |
| 102 | + print_r($template->getParameters()); |
| 103 | +} |
23 | 104 | ```
|
24 |
| -### Parse wikitext internal links |
| 105 | + |
| 106 | +### Parsing Internal Links |
| 107 | + |
25 | 108 | ```php
|
26 | 109 | use WikiConnect\ParseWiki\ParserInternalLinks;
|
27 |
| -$parser = new ParserInternalLinks($wikitext); |
28 |
| -$parser->parse($parser->getLinks()); |
| 110 | + |
| 111 | +$text = 'See [[Main Page|the main page]] and [[Help]].'; |
| 112 | + |
| 113 | +$parser = new ParserInternalLinks($text); |
| 114 | +$links = $parser->getTargets(); |
| 115 | + |
| 116 | +foreach ($links as $link) { |
| 117 | + echo 'Target: ' . $link->getTarget() . PHP_EOL; |
| 118 | + echo 'Text: ' . ($link->getText() ?? $link->getTarget()) . PHP_EOL; |
| 119 | +} |
29 | 120 | ```
|
30 |
| -### Parse wikitext external links |
| 121 | + |
| 122 | +### Parsing External Links |
| 123 | + |
31 | 124 | ```php
|
32 | 125 | use WikiConnect\ParseWiki\ParserExternalLinks;
|
33 |
| -$parser = new ParserExternalLinks($wikitext); |
34 |
| -$parser->parse($parser->getLinks()); |
| 126 | + |
| 127 | +$text = 'Visit [https://example.com Example Site] and [https://nolabel.com].'; |
| 128 | + |
| 129 | +$parser = new ParserExternalLinks($text); |
| 130 | +$links = $parser->getLinks(); |
| 131 | + |
| 132 | +foreach ($links as $link) { |
| 133 | + echo 'URL: ' . $link->getLink() . PHP_EOL; |
| 134 | + echo 'Label: ' . ($link->getText() ?: 'No label') . PHP_EOL; |
| 135 | +} |
35 | 136 | ```
|
36 |
| -### Parse wikitext template |
| 137 | + |
| 138 | +### Parsing Citations |
| 139 | + |
37 | 140 | ```php
|
38 |
| -use WikiConnect\ParseWiki\ParserTemplate; |
39 |
| -$parser = new ParserTemplate($wikitext); |
40 |
| -$parser->parse($parser->getTemplate()); |
| 141 | +use WikiConnect\ParseWiki\ParserCitations; |
| 142 | + |
| 143 | +$text = 'Some text with a citation.<ref name="source">This is a citation</ref>'; |
| 144 | + |
| 145 | +$parser = new ParserCitations($text); |
| 146 | +$citations = $parser->getCitations(); |
| 147 | + |
| 148 | +foreach ($citations as $citation) { |
| 149 | + echo 'Content: ' . $citation->getContent() . PHP_EOL; |
| 150 | + echo 'Attributes: ' . $citation->getAttributes() . PHP_EOL; |
| 151 | +} |
41 | 152 | ```
|
42 |
| -### Parse wikitext templates |
| 153 | + |
| 154 | +### Parsing Categories |
| 155 | + |
43 | 156 | ```php
|
44 |
| -use WikiConnect\ParseWiki\ParserTemplates; |
45 |
| -$parser = new ParserTemplates($wikitext); |
46 |
| -$parser->parse($parser->getTemplates()); |
| 157 | +use WikiConnect\ParseWiki\ParserCategories; |
| 158 | + |
| 159 | +$text = 'Some text [[Category:Science]] and [[Category:Math|Displayed]].'; |
| 160 | + |
| 161 | +$parser = new ParserCategories($text); |
| 162 | +$categories = $parser->getCategories(); |
| 163 | + |
| 164 | +foreach ($categories as $category) { |
| 165 | + echo 'Category: ' . $category . PHP_EOL; |
| 166 | +} |
47 | 167 | ```
|
| 168 | + |
| 169 | +--- |
| 170 | + |
| 171 | +## 🙌 Author |
| 172 | + |
| 173 | +Developed with ❤️ by Gerges. |
0 commit comments