Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewdalpino committed Jan 24, 2022
0 parents commit 39321cb
Show file tree
Hide file tree
Showing 20 changed files with 1,240 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
* text=auto

*.php text eol=lf
1 change: 1 addition & 0 deletions .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
github: [andrewdalpino]
39 changes: 39 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: "Code Checks"

on: [push, pull_request]

jobs:
Build:
name: PHP ${{ matrix.php-versions }} on ${{ matrix.operating-system }}
runs-on: ${{ matrix.operating-system }}
strategy:
matrix:
operating-system: [windows-latest, ubuntu-latest, macos-latest]
php-versions: ['7.4', '8.0', '8.1']

steps:
- name: Checkout
uses: actions/checkout@v2

- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php-versions }}
tools: pecl
extensions: mbstring, fileinfo
ini-values: memory_limit=-1

- name: Validate composer.json
run: composer validate

- name: Install Dependencies
run: composer install

- name: Static Analysis
run: composer analyze

- name: Unit Tests
run: composer test

- name: Coding Style
run: composer check
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/vendor
composer.lock
.phpunit.result.cache
.php-cs-fixer.cache
Thumbs.db
.DS_Store
debug.log
/.idea
/.vscode
/.vs
122 changes: 122 additions & 0 deletions .php_cs.dist.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
<?php

use PhpCsFixer\Finder;
use PhpCsFixer\Config;

$finder = Finder::create()
->in(__DIR__);

$config = new Config();

return $config->setRules([
'@PSR2' => true,
'align_multiline_comment' => true,
'array_syntax' => ['syntax' => 'short'],
'backtick_to_shell_exec' => true,
'binary_operator_spaces' => true,
'blank_line_after_namespace' => true,
'blank_line_after_opening_tag' => true,
'blank_line_before_statement' => [
'statements' => [
'break', 'case', 'continue', 'declare', 'default', 'do', 'for',
'if', 'foreach', 'return', 'switch', 'try', 'while',
],
],
'braces' => [
'allow_single_line_closure' => false,
'position_after_control_structures' => 'same',
'position_after_functions_and_oop_constructs' => 'next',
],
'cast_spaces' => ['space' => 'single'],
'combine_consecutive_issets' => true,
'combine_consecutive_unsets' => true,
'compact_nullable_typehint' => true,
'concat_space' => ['spacing' => 'one'],
'fully_qualified_strict_types' => true,
'function_typehint_space' => true,
'increment_style' => ['style' => 'pre'],
'linebreak_after_opening_tag' => true,
'list_syntax' => ['syntax' => 'short'],
'lowercase_cast' => true,
'lowercase_static_reference' => true,
'magic_constant_casing' => true,
'magic_method_casing' => true,
'multiline_comment_opening_closing' => true,
'multiline_whitespace_before_semicolons' => [
'strategy' => 'no_multi_line',
],
'native_function_casing' => true,
'native_function_type_declaration_casing' => true,
'new_with_braces' => true,
'no_alternative_syntax' => true,
'no_blank_lines_after_class_opening' => true,
'no_blank_lines_after_phpdoc' => true,
'no_empty_statement' => true,
'no_extra_blank_lines' => true,
'no_leading_import_slash' => true,
'no_leading_namespace_whitespace' => true,
'no_mixed_echo_print' => ['use' => 'echo'],
'no_null_property_initialization' => true,
'no_short_bool_cast' => true,
'no_singleline_whitespace_before_semicolons' => true,
'no_spaces_around_offset' => true,
'no_superfluous_phpdoc_tags' => false,
'no_superfluous_elseif' => true,
'no_trailing_comma_in_list_call' => true,
'no_trailing_comma_in_singleline_array' => true,
'no_unneeded_control_parentheses' => true,
'no_unneeded_curly_braces' => true,
'no_unset_cast' => true,
'no_unused_imports' => true,
'no_useless_else' => true,
'no_useless_return' => true,
'no_whitespace_before_comma_in_array' => true,
'no_whitespace_in_blank_line' => true,
'normalize_index_brace' => true,
'nullable_type_declaration_for_default_null_value' => true,
'object_operator_without_whitespace' => true,
'ordered_class_elements' => [
'order' => [
'use_trait', 'constant_public', 'constant_protected',
'constant_private', 'property_public_static', 'property_protected_static',
'property_private_static', 'property_public', 'property_protected',
'property_private', 'method_public_static', 'method_protected_static',
'method_private_static', 'construct', 'destruct', 'phpunit',
'method_public', 'method_protected', 'method_private', 'magic',
],
'sort_algorithm' => 'none',
],
'php_unit_fqcn_annotation' => true,
'php_unit_method_casing' => ['case' => 'camel_case'],
'phpdoc_add_missing_param_annotation' => ['only_untyped' => false],
'phpdoc_align' => ['align' => 'left'],
'phpdoc_line_span' => [
'const' => 'multi',
'method' => 'multi',
'property' => 'multi',
],
'phpdoc_no_access' => true,
'phpdoc_no_empty_return' => true,
'phpdoc_no_useless_inheritdoc' => true,
'phpdoc_order' => true,
'phpdoc_scalar' => true,
'phpdoc_single_line_var_spacing' => true,
'phpdoc_to_comment' => false,
'phpdoc_trim' => true,
'phpdoc_trim_consecutive_blank_line_separation' => true,
'phpdoc_var_without_name' => true,
'protected_to_private' => true,
'return_assignment' => false,
'return_type_declaration' => ['space_before' => 'one'],
'semicolon_after_instruction' => true,
'short_scalar_cast' => true,
'simplified_null_return' => false,
'single_blank_line_before_namespace' => true,
'single_quote' => true,
'single_line_comment_style' => true,
'ternary_operator_spaces' => true,
'ternary_to_null_coalescing' => true,
'trim_array_spaces' => true,
'unary_operator_spaces' => true,
'whitespace_after_comma_in_array' => true,
])->setFinder($finder);
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
- 1.0.0
- No changes

- 1.0.0-beta1
- Implemented Bloom Filter
- Added Boolean Array
- Added `insert` method
- Simplify false positive rate calculation
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2022 Andrew DalPino

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
74 changes: 74 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Ok Bloomer
An autoscaling [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) with ultra-low memory footprint for PHP. Ok Bloomer employs a novel layered filtering strategy that allows it to expand while maintaining an upper bound on the false positive rate. Each layer is comprised of a bitmap that remembers the hash signatures of the items inserted so far. If an item gets caught in the filter, then it has probably been seen before. However, if an item passes through the filter, then it definitely has never been seen before.

- **Ultra-low** memory footprint
- **Autoscaling** works on streaming data
- **Bounded** maximum false positive rate
- **Open-source** and free to use commercially

## Installation
Install into your project using [Composer](https://getcomposer.org/):

```sh
$ composer require andrewdalpino/okbloomer
```

### Requirements
- [PHP](https://php.net/manual/en/install.php) 7.4 or above

## Bloom Filter
A probabilistic data structure that estimates the prior occurrence of a given item with a maximum false positive rate.

### Parameters
| # | Name | Default | Type | Description |
|---|---|---|---|---|
| 1 | maxFalsePositiveRate | 0.01 | float | The false positive rate to remain below. |
| 2 | numHashes | 4 | int, null | The number of hash functions used, i.e. the number of slices per layer. Set to `null` for auto. |
| 3 | layerSize | 32000000 | int | The size of each layer of the filter in bits. |

### Example

```php
use OkBloomer\BloomFilter;

$filter = new BloomFilter(0.01, 4, 32000000);

$filter->insert('foo');

echo $filter->exists('foo');

echo $filter->existsOrInsert('bar');

echo $filter->exists('bar');
```

```
true
false
true
```

## Testing
To run the unit tests:

```sh
$ composer test
```
## Static Analysis
To run static code analysis:

```sh
$ composer analyze
```

## Benchmarks
To run the benchmarks:

```sh
$ composer benchmark
```

## References
- [1] P. S. Almeida et al. (2007). Scalable Bloom Filters.
79 changes: 79 additions & 0 deletions benchmarks/BloomFilterBench.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?php

namespace OkBloomer\Benchmarks;

use OkBloomer\BloomFilter;

/**
* @BeforeMethods({"setUp"})
*/
class BloomFilterBench
{
private const NUM_TOKENS = 100000;

private const TOKEN_LENGTH = 25;

/**
* @var list<string>
*/
protected $tokens;

/**
* @var \OkBloomer\BloomFilter
*/
protected $filter;

/**
* Generate a token of length k.
*
* @param int $k
* @return string
*/
private static function generateToken(int $k) : string
{
$token = '';

for ($i = 0; $i < $k; ++$i) {
$token .= chr(rand(0, 254));
}

return $token;
}

public function setUp() : void
{
$tokens = [];

for ($i = 0; $i < self::NUM_TOKENS; ++$i) {
$tokens[] = self::generateToken(self::TOKEN_LENGTH);
}

$this->tokens = $tokens;

$this->filter = new BloomFilter();
}

/**
* @Subject
* @Iterations(5)
* @OutputTimeUnit("seconds", precision=3)
*/
public function insert() : void
{
foreach ($this->tokens as $token) {
$this->filter->insert($token);
}
}

/**
* @Subject
* @Iterations(5)
* @OutputTimeUnit("seconds", precision=3)
*/
public function existsOrInsert() : void
{
foreach ($this->tokens as $token) {
$this->filter->existsOrInsert($token);
}
}
}
Loading

0 comments on commit 39321cb

Please sign in to comment.