-
Notifications
You must be signed in to change notification settings - Fork 1
/
editdist.c
125 lines (106 loc) · 3.01 KB
/
editdist.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/*
* Copyright (c) 2006 Damien Miller <[email protected]>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/types.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "Python.h"
#if defined(_MSC_VER)
typedef unsigned __int8 u_int8_t;
#endif
#define EDITDIST_VERSION "0.3"
/* $Id$ */
#ifndef MIN
# define MIN(a, b) (((a) < (b)) ? (a) : (b))
#endif
static int
edit_distance(const u_int8_t *a, size_t alen, const u_int8_t *b, size_t blen)
{
size_t tmplen, i, j;
const u_int8_t *tmp;
int *current, *previous, *tmpl, add, del, chg, r;
/* Swap to reduce worst-case memory requirement */
if (alen > blen) {
tmp = a;
a = b;
b = tmp;
tmplen = alen;
alen = blen;
blen = tmplen;
}
if (alen == 0)
return (blen);
if ((previous = calloc(alen + 1, sizeof(*previous))) == NULL)
return (-1);
if ((current = calloc(alen + 1, sizeof(*current))) == NULL) {
free(current);
return (-1);
}
for (i = 0; i < alen + 1; i++)
previous[i] = i;
for (i = 1; i < blen + 1; i++) {
if (i > 1) {
memset(previous, 0, (alen + 1) * sizeof(*previous));
tmpl = previous;
previous = current;
current = tmpl;
}
current[0] = i;
for (j = 1; j < alen + 1; j++) {
add = previous[j] + 1;
del = current[j - 1] + 1;
chg = previous[j - 1];
if (a[j - 1] != b[i - 1])
chg++;
current[j] = MIN(add, del);
current[j] = MIN(current[j], chg);
}
}
r = current[alen];
free(previous);
free(current);
return (r);
}
PyDoc_STRVAR(editdist_distance_doc,
"distance(a, b) -> int\n\
Calculates Levenshtein's edit distance between strings \"a\" and \"b\"\n");
static PyObject *
editdist_distance(PyObject *self, PyObject *args)
{
char *a, *b;
int alen, blen, r;
if (!PyArg_ParseTuple(args, "s#s#", &a, &alen, &b, &blen))
return NULL;
r = edit_distance(a, alen, b, blen);
if (r == -1) {
PyErr_SetString(PyExc_MemoryError, "Out of memory");
return NULL;
}
return PyInt_FromLong(r);
}
static PyMethodDef editdist_methods[] = {
{ "distance", (PyCFunction)editdist_distance,
METH_VARARGS, editdist_distance_doc },
{NULL, NULL, 0, NULL } /* sentinel */
};
PyDoc_STRVAR(module_doc, "Calculate Levenshtein's edit distance.\n");
PyMODINIT_FUNC
initeditdist(void)
{
PyObject *m;
m = Py_InitModule3("editdist", editdist_methods, module_doc);
PyModule_AddStringConstant(m, "__version__", EDITDIST_VERSION);
}