Skip to content

Commit bf87243

Browse files
committed
id_table: use smaller hash without 'use' field
use classic thombstone technique. validate table overflow on insertion by counting free slots. it is not exact check, so could lead to size explosure.
1 parent c225038 commit bf87243

File tree

2 files changed

+242
-2
lines changed

2 files changed

+242
-2
lines changed

id_table.c

Lines changed: 241 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
*/
3333

3434
#ifndef ID_TABLE_IMPL
35-
#define ID_TABLE_IMPL 31
35+
#define ID_TABLE_IMPL 23
3636
#endif
3737

3838
#if ID_TABLE_IMPL == 0
@@ -106,6 +106,13 @@
106106
#define ID_TABLE_USE_SMALL_HASH 1
107107
#define ID_TABLE_USE_ID_SERIAL 1
108108

109+
#elif ID_TABLE_IMPL == 23
110+
#define ID_TABLE_NAME hash
111+
#define ID_TABLE_IMPL_TYPE struct hash_id_table
112+
113+
#define ID_TABLE_USE_SMALLER_HASH 1
114+
#define ID_TABLE_USE_ID_SERIAL 1
115+
109116
#elif ID_TABLE_IMPL == 31
110117
#define ID_TABLE_NAME mix
111118
#define ID_TABLE_IMPL_TYPE struct mix_id_table
@@ -1359,6 +1366,239 @@ hash_id_table_foreach_values(struct hash_id_table *tbl, enum rb_id_table_iterato
13591366
}
13601367
#endif /* ID_TABLE_USE_SMALL_HASH */
13611368

1369+
#if ID_TABLE_USE_SMALLER_HASH
1370+
#define HASH_MIN_CAPA 4
1371+
1372+
struct hash_id_table {
1373+
int capa;
1374+
int num;
1375+
id_key_t *keys;
1376+
};
1377+
#define TABLE_VALUES(tbl) ((VALUE *)((tbl)->keys + (tbl)->capa))
1378+
static struct hash_id_table *
1379+
hash_id_table_init(struct hash_id_table *tbl, size_t capa)
1380+
{
1381+
if (capa > 0) {
1382+
tbl->capa = (int)capa;
1383+
tbl->keys = (id_key_t *)xmalloc(sizeof(id_key_t) * capa + sizeof(VALUE) * capa);
1384+
}
1385+
return tbl;
1386+
}
1387+
1388+
static struct hash_id_table *
1389+
hash_id_table_create(size_t capa)
1390+
{
1391+
struct hash_id_table *tbl = ZALLOC(struct hash_id_table);
1392+
return hash_id_table_init(tbl, capa);
1393+
}
1394+
1395+
static void
1396+
hash_id_table_free(struct hash_id_table *tbl)
1397+
{
1398+
xfree(tbl->keys);
1399+
xfree(tbl);
1400+
}
1401+
1402+
static void
1403+
hash_id_table_clear(struct hash_id_table *tbl)
1404+
{
1405+
xfree(tbl->keys);
1406+
memset(tbl, 0, sizeof(*tbl));
1407+
}
1408+
1409+
static size_t
1410+
hash_id_table_size(struct hash_id_table *tbl)
1411+
{
1412+
return (size_t)tbl->num;
1413+
}
1414+
1415+
static size_t
1416+
hash_id_table_memsize(struct hash_id_table *tbl)
1417+
{
1418+
return (sizeof(id_key_t) + sizeof(VALUE)) * tbl->capa + sizeof(struct hash_id_table);
1419+
}
1420+
1421+
static void
1422+
hash_table_add(struct hash_id_table *tbl, id_key_t key, VALUE val)
1423+
{
1424+
id_key_t *keys = tbl->keys;
1425+
int mask = tbl->capa - 1;
1426+
int pos = key & mask;
1427+
int d = 1;
1428+
while (keys[pos]) {
1429+
pos = (pos + d) & mask;
1430+
d++;
1431+
}
1432+
keys[pos] = key;
1433+
TABLE_VALUES(tbl)[pos] = val;
1434+
tbl->num++;
1435+
}
1436+
1437+
static void
1438+
hash_table_extend(struct hash_id_table *tbl)
1439+
{
1440+
const int capa = tbl->capa == 0 ? HASH_MIN_CAPA : (tbl->capa * 2);
1441+
struct hash_id_table ttbl = {capa, 0}, tttbl;
1442+
const int size = sizeof(id_key_t) * capa + sizeof(VALUE) * capa;
1443+
int i;
1444+
ttbl.keys = (id_key_t*)xcalloc(1, size);
1445+
for (i=tbl->capa-1; i>=0;i--) {
1446+
if (tbl->keys[i] && ~tbl->keys[i]) {
1447+
hash_table_add(&ttbl, tbl->keys[i], TABLE_VALUES(tbl)[i]);
1448+
}
1449+
}
1450+
tttbl = *tbl;
1451+
*tbl = ttbl;
1452+
xfree(tttbl.keys);
1453+
}
1454+
1455+
static int
1456+
hash_table_index(struct hash_id_table *tbl, id_key_t key)
1457+
{
1458+
id_key_t *keys = tbl->keys;
1459+
int mask = tbl->capa - 1;
1460+
int pos = key & mask;
1461+
int d = 1;
1462+
if (tbl->capa == 0) {
1463+
return -1;
1464+
}
1465+
while (keys[pos] != key) {
1466+
if (!keys[pos]) return -1;
1467+
pos = (pos + d) & mask;
1468+
d++;
1469+
}
1470+
return pos;
1471+
}
1472+
1473+
static int
1474+
hash_id_table_lookup(struct hash_id_table *tbl, ID id, VALUE *valp)
1475+
{
1476+
id_key_t key = id2key(id);
1477+
int index = hash_table_index(tbl, key);
1478+
1479+
if (index >= 0) {
1480+
*valp = TABLE_VALUES(tbl)[index];
1481+
return TRUE;
1482+
}
1483+
else {
1484+
return FALSE;
1485+
}
1486+
}
1487+
1488+
static int
1489+
hash_id_table_insert(struct hash_id_table *tbl, ID id, VALUE val)
1490+
{
1491+
id_key_t key = id2key(id);
1492+
id_key_t *keys = tbl->keys;
1493+
int mask = tbl->capa - 1;
1494+
int free = 0;
1495+
int pos = key & mask;
1496+
int d = 1;
1497+
int max = tbl->capa == 4 ? 4 :
1498+
tbl->capa <= 16 ? tbl->capa / 2 : tbl->capa / 4;
1499+
int freecnt = 0;
1500+
int set = FALSE;
1501+
while (max && freecnt < 2) {
1502+
if (keys[pos] == key) {
1503+
TABLE_VALUES(tbl)[pos] = val;
1504+
set = TRUE;
1505+
}
1506+
if (!free && !(keys[pos] && ~keys[pos])) {
1507+
free = pos+1;
1508+
}
1509+
if (!keys[pos])
1510+
freecnt++;
1511+
pos = (pos + d) & mask;
1512+
d++;
1513+
max--;
1514+
}
1515+
if (!max) {
1516+
hash_table_extend(tbl);
1517+
hash_id_table_insert(tbl, id, val);
1518+
} else if (!set) {
1519+
pos = free - 1;
1520+
keys[pos] = key;
1521+
TABLE_VALUES(tbl)[pos] = val;
1522+
tbl->num++;
1523+
}
1524+
return TRUE;
1525+
}
1526+
1527+
static int
1528+
hash_delete_index(struct hash_id_table *tbl, int index)
1529+
{
1530+
if (index >= 0) {
1531+
tbl->keys[index] = ~0;
1532+
tbl->num--;
1533+
return TRUE;
1534+
} else {
1535+
return FALSE;
1536+
}
1537+
}
1538+
1539+
static int
1540+
hash_id_table_delete(struct hash_id_table *tbl, ID id)
1541+
{
1542+
const id_key_t key = id2key(id);
1543+
int index = hash_table_index(tbl, key);
1544+
return hash_delete_index(tbl, index);
1545+
}
1546+
1547+
static void
1548+
hash_id_table_foreach(struct hash_id_table *tbl, enum rb_id_table_iterator_result (*func)(ID id, VALUE val, void *data), void *data)
1549+
{
1550+
int capa = tbl->capa;
1551+
int i;
1552+
const id_key_t *keys = tbl->keys;
1553+
const VALUE *values = TABLE_VALUES(tbl);
1554+
enum rb_id_table_iterator_result ret;
1555+
1556+
for (i=0; i<capa; i++) {
1557+
const id_key_t key = keys[i];
1558+
if (key && ~key) {
1559+
ret = (*func)(key2id(key), values[i], data);
1560+
assert(key != 0);
1561+
1562+
switch (ret) {
1563+
case ID_TABLE_STOP:
1564+
return;
1565+
case ID_TABLE_DELETE:
1566+
hash_delete_index(tbl, i);
1567+
case ID_TABLE_CONTINUE:
1568+
break;
1569+
}
1570+
}
1571+
}
1572+
}
1573+
1574+
static void
1575+
hash_id_table_foreach_values(struct hash_id_table *tbl, enum rb_id_table_iterator_result (*func)(VALUE val, void *data), void *data)
1576+
{
1577+
int capa = tbl->capa;
1578+
int i;
1579+
const id_key_t *keys = tbl->keys;
1580+
VALUE *values = TABLE_VALUES(tbl);
1581+
enum rb_id_table_iterator_result ret;
1582+
1583+
for (i=0; i<capa; i++) {
1584+
const id_key_t key = keys[i];
1585+
if (key && ~key) {
1586+
ret = (*func)(values[i], data);
1587+
assert(key != 0);
1588+
1589+
switch (ret) {
1590+
case ID_TABLE_STOP:
1591+
return;
1592+
case ID_TABLE_DELETE:
1593+
hash_delete_index(tbl, i);
1594+
case ID_TABLE_CONTINUE:
1595+
break;
1596+
}
1597+
}
1598+
}
1599+
}
1600+
#endif /* ID_TABLE_USE_SMALLER_HASH */
1601+
13621602
#if ID_TABLE_USE_MIX
13631603

13641604
struct mix_id_table {

symbol.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,7 @@ next_id_base(void)
616616
{
617617
rb_id_serial_t next_serial = global_symbols.last_id + 1;
618618

619-
if (next_serial == 0) {
619+
if (next_serial == ~(rb_id_serial_t)0) {
620620
return (ID)-1;
621621
}
622622
else {

0 commit comments

Comments
 (0)