-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathtest_detect_encoding.php
More file actions
41 lines (34 loc) · 901 Bytes
/
test_detect_encoding.php
File metadata and controls
41 lines (34 loc) · 901 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
<?php
$possible_encodings = array('windows-1251', 'koi8-r', 'iso8859-5');
/*
$data = 'Русская строка';
$encoding = 'iso8859-5';
$data = iconv('UTF-8', $encoding, 'Очень длинная русская строка');
*/
$data = file_get_contents('test/cp1251_2.html');
$weights = array();
$specters = array();
foreach ($possible_encodings as $encoding)
{
$weights[$encoding] = 0;
$specters[$encoding] = require 'specters/'.$encoding.'.php';
}
if(preg_match_all("#(?<let>.{2})#",$data,$matches))
{
foreach($matches['let'] as $key)
{
foreach ($possible_encodings as $encoding)
{
if (isset($specters[$encoding][$key]))
{
$weights[$encoding] += $specters[$encoding][$key];
}
}
}
}
$sum_weight = array_sum($weights);
foreach ($weights as $encoding => $weight)
{
$weights[$encoding] = $weight / $sum_weight;
}
var_dump($weights);