diff --git a/django_elasticsearch/managers.py b/django_elasticsearch/managers.py index edf2718..9c77982 100644 --- a/django_elasticsearch/managers.py +++ b/django_elasticsearch/managers.py @@ -255,26 +255,28 @@ def make_mapping(self): for field_name in self.get_fields(): try: - field = self.model._meta.get_field(field_name) - except FieldDoesNotExist: - # abstract field - mapping = {} - else: - mapping = {'type': ELASTICSEARCH_FIELD_MAP.get( - field.get_internal_type(), 'string')} + mapping = self.model.Elasticsearch.mappings[field_name] + except (AttributeError, KeyError): + mapping = None + if mapping: + mappings[field_name] = mapping + try: # if an analyzer is set as default, use it. # TODO: could be also tokenizer, filter, char_filter - if mapping['type'] == 'string': - analyzer = settings.ELASTICSEARCH_SETTINGS['analysis']['default'] - mapping['analyzer'] = analyzer + try: + field = self.model._meta.get_field(field_name) + except FieldDoesNotExist: + # abstract field + pass + else: + mtype = ELASTICSEARCH_FIELD_MAP.get(field.get_internal_type(), 'string') + if mtype == 'string': + analyzer = settings.ELASTICSEARCH_SETTINGS['analysis']['default'] + mapping['type'] = 'string' + mapping['analyzer'] = analyzer except (ValueError, AttributeError, KeyError, TypeError): pass - try: - mapping.update(self.model.Elasticsearch.mappings[field_name]) - except (AttributeError, KeyError, TypeError): - pass - mappings[field_name] = mapping # add a completion mapping for every auto completable field fields = self.model.Elasticsearch.completion_fields or [] diff --git a/django_elasticsearch/query.py b/django_elasticsearch/query.py index e4644bb..c2804c7 100644 --- a/django_elasticsearch/query.py +++ b/django_elasticsearch/query.py @@ -121,7 +121,7 @@ def __len__(self): def make_search_body(self): body = {} - search = {} + query = {} if self.fuzziness is None: # beware, could be 0 fuzziness = getattr(settings, 'ELASTICSEARCH_FUZZINESS', 0.5) @@ -129,7 +129,7 @@ def make_search_body(self): fuzziness = self.fuzziness if self._query: - search['query'] = { + query = { 'match': { '_all': { 'query': self._query, @@ -140,15 +140,10 @@ def make_search_body(self): if self.filters: # TODO: should we add _cache = true ?! - search['filter'] = {} + filters = {} mapping = self.model.es.get_mapping() for field, value in self.filters.items(): - try: - value = value.lower() - except AttributeError: - pass - field, operator = self.sanitize_lookup(field) try: @@ -161,38 +156,46 @@ def make_search_body(self): if is_nested and isinstance(value, Model): value = value.id - if operator == 'exact': - filtr = {'bool': {'must': [{'term': {field_name: value}}]}} + if operator == 'contains': + nested_update(filters, + {'query': {'match': {field_name: {'query': value}}}}) + if len(filters['query']['match'].items()) > 1: + raise NotImplementedError("multi_match is not implemented.") + elif operator == 'isnull': + if value: + filtr = {'missing': {'field': field_name}} + else: + filtr = {'exists': {'field': field_name}} - elif operator == 'not': - filtr = {'bool': {'must_not': [{'term': {field_name: value}}]}} + nested_update(filters, {'filter': filtr}) - elif operator == 'should': - filtr = {'bool': {operator: [{'term': {field_name: value}}]}} + else: + if operator == 'exact': + filtr = {'must': [{'term': {field_name: value}}]} - elif operator == 'contains': - filtr = {'query': {'match': {field_name: {'query': value}}}} + elif operator == 'not': + filtr = {'must_not': [{'term': {field_name: value}}]} - elif operator in ['gt', 'gte', 'lt', 'lte']: - filtr = {'bool': {'must': [{'range': {field_name: { - operator: value}}}]}} + elif operator == 'should': + filtr = {operator: [{'term': {field_name: value}}]} - elif operator == 'range': - filtr = {'bool': {'must': [{'range': {field_name: { - 'gte': value[0], - 'lte': value[1]}}}]}} + elif operator in ['gt', 'gte', 'lt', 'lte']: + filtr = {'must': [{'range': {field_name: { + operator: value}}}]} - elif operator == 'isnull': - if value: - filtr = {'missing': {'field': field_name}} - else: - filtr = {'exists': {'field': field_name}} + elif operator == 'range': + filtr = {'must': [{'range': {field_name: { + 'gte': value[0], + 'lte': value[1]}}}]} - nested_update(search['filter'], filtr) + nested_update(filters, {'filter': {'bool': filtr}}) - body['query'] = {'filtered': search} + body = {'query': {'filtered': filters}} + if query: + body['query']['filtered']['query'] = query else: - body = search + if query: + body = {'query': query} return body diff --git a/django_elasticsearch/tests/test_indexable.py b/django_elasticsearch/tests/test_indexable.py index 1d0d9b2..d350b50 100644 --- a/django_elasticsearch/tests/test_indexable.py +++ b/django_elasticsearch/tests/test_indexable.py @@ -95,8 +95,8 @@ def test_fuzziness(self): "default": "test_analyzer", "analyzer": { "test_analyzer": { - "type": "custom", - "tokenizer": "standard", + "type": "custom", + "tokenizer": "standard", } } } @@ -131,14 +131,14 @@ def test_auto_completion(self): @withattrs(TestModel.Elasticsearch, 'fields', ['username', 'date_joined']) def test_get_mapping(self): + # Reset the eventual cache on the Model mapping TestModel.es._mapping = None TestModel.es.flush() TestModel.es.do_update() - expected = {u'date_joined': {u'format': u'dateOptionalTime', u'type': u'date'}, - u'username': {u'index': u'not_analyzed', u'type': u'string'}} + expected = {u'username': {u'index': u'not_analyzed', u'type': u'string'}, + u'date_joined': {u'format': u'dateOptionalTime', u'type': u'date'}} - # Reset the eventual cache on the Model mapping mapping = TestModel.es.get_mapping() TestModel.es._mapping = None self.assertEqual(expected, mapping) @@ -162,7 +162,7 @@ def test_reevaluate(self): q = TestModel.es.search('woot') self.assertTrue(self.instance in q.deserialize()) # evaluate - q = q.filter(last_name='grut') + q = q.filter(username='grut') self.assertFalse(self.instance in q.deserialize()) # evaluate def test_diff(self): @@ -229,7 +229,7 @@ def test_auto_save(self): self.instance.first_name = u'Test' self.instance.save() TestModel.es.do_update() - self.assertEqual(TestModel.es.filter(first_name=u'Test').count(), 1) + self.assertEqual(TestModel.es.search(u'Test').count(), 1) def test_auto_delete(self): self.instance.es.delete() diff --git a/django_elasticsearch/tests/test_qs.py b/django_elasticsearch/tests/test_qs.py index 60f8c12..8905e9f 100644 --- a/django_elasticsearch/tests/test_qs.py +++ b/django_elasticsearch/tests/test_qs.py @@ -122,7 +122,8 @@ def test_suggestions(self): def test_count(self): self.assertEqual(TestModel.es.count(), 4) self.assertEqual(TestModel.es.search("John").count(), 1) - self.assertEqual(TestModel.es.search("").filter(last_name=u"Smith").count(), 3) + self.assertEqual(TestModel.es.search("").filter(username=u"Woot").count(), 0) + self.assertEqual(TestModel.es.search("").filter(username=u"woot").count(), 1) def test_count_after_reeval(self): # regression test @@ -163,14 +164,14 @@ def test_get(self): TestModel.es.queryset.get() def test_filtering(self): - contents = TestModel.es.filter(last_name=u"Smith").deserialize() + contents = TestModel.es.filter(last_name__contains=u"Smith").deserialize() self.assertTrue(self.t1 in contents) self.assertTrue(self.t2 in contents) self.assertTrue(self.t3 in contents) self.assertTrue(self.t4 not in contents) def test_multiple_filter(self): - contents = TestModel.es.filter(last_name=u"Smith", first_name=u"jack").deserialize() + contents = TestModel.es.filter(id__gt=self.t1.id, id__lt=self.t3.id).deserialize() self.assertTrue(self.t1 not in contents) self.assertTrue(self.t2 in contents) self.assertTrue(self.t3 not in contents) @@ -293,29 +294,32 @@ def test_excluding_lookups(self): self.assertTrue(self.t4 in contents) def test_chain_filter_exclude(self): - contents = TestModel.es.filter(last_name=u"Smith").exclude(username=u"woot").deserialize() + contents = TestModel.es.filter(last_name__contains=u"Smith").exclude(username=u"woot").deserialize() self.assertTrue(self.t1 in contents) # note: it works because username is "not analyzed" self.assertTrue(self.t2 not in contents) # excluded self.assertTrue(self.t3 in contents) self.assertTrue(self.t4 not in contents) # not a Smith - @withattrs(TestModel.Elasticsearch, 'fields', ['id', 'username']) - @withattrs(TestModel.Elasticsearch, 'mappings', {}) + def test_chain_search_filter(self): + contents = TestModel.es.search("Smith").filter(id__gt=self.t2.id).deserialize() + self.assertTrue(self.t1 not in contents) + self.assertTrue(self.t2 not in contents) + self.assertTrue(self.t3 in contents) + self.assertTrue(self.t4 not in contents) + def test_contains(self): - TestModel.es._fields = None - TestModel.es._mapping = None - TestModel.es.flush() # update the mapping, username is now analyzed - time.sleep(2) # TODO: flushing is not immediate, find a better way - contents = TestModel.es.filter(username__contains='woot').deserialize() + contents = TestModel.es.filter(last_name__contains='Smith').deserialize() self.assertTrue(self.t1 in contents) self.assertTrue(self.t2 in contents) - self.assertTrue(self.t3 not in contents) + self.assertTrue(self.t3 in contents) self.assertTrue(self.t4 not in contents) def test_should_lookup(self): - contents = TestModel.es.all().filter(last_name__should=u"Smith").deserialize() + contents = TestModel.es.all().filter(first_name__should=u"john", + last_name__should=u"bar").deserialize() self.assertTrue(self.t1 in contents) - self.assertTrue(self.t4 not in contents) + self.assertTrue(self.t4 in contents) + self.assertEqual(len(contents), 2) def test_nonzero(self): self.assertTrue(TestModel.es.all()) @@ -372,5 +376,5 @@ def test_prefetch_related(self): TestModel.es.all().prefetch_related() def test_range_plus_must(self): - q = TestModel.es.filter(date_joined__gt='now-10d').filter(first_name="John") + q = TestModel.es.filter(date_joined__gt='now-10d').filter(first_name__contains="John") self.assertEqual(q.count(), 1) diff --git a/readme.md b/readme.md index 55b19ba..ae9fd5d 100644 --- a/readme.md +++ b/readme.md @@ -228,8 +228,7 @@ EsQueryset API: --------------- This class is as close as possible to a standard relational db Queryset, however the db operations (update and delete) are deactivated (i'm open for discussion on if and how to implement these). Note that just like regular Querysets, EsQuerysets are lazy, they can be ordered, filtered and faceted. -Note that the return value of the queryset is higly dependent on your mapping, for example, if you want to be able to do an exact filtering with filter() you need a field with {"index" : "not_analyzed"}. -Also by default, filters are case insensitive, if you have a case sensitive tokenizer, you need to instantiate EsQueryset with ignore_case=False. +Note that the return value of the queryset is higly dependent on your mapping, for example, if you want to be able to do an exact filtering on a string with filter() you need a field with {"index" : "not_analyzed"}. An EsQueryset acts a lot like a regular Queryset: ``` diff --git a/test_project/test_app/models.py b/test_project/test_app/models.py index 933fbdb..0b53a2c 100644 --- a/test_project/test_app/models.py +++ b/test_project/test_app/models.py @@ -26,7 +26,9 @@ class Elasticsearch(EsIndexable.Elasticsearch): index = 'django-test' doc_type = 'test-doc-type' mappings = { - "username": {"index": "not_analyzed"}, + "username": { + "type": "string", + "index": "not_analyzed"}, "date_joined_exp": {"type": "object"} } serializer_class = TestSerializer diff --git a/test_project/test_project/settings.py b/test_project/test_project/settings.py index 7227e30..54511c6 100644 --- a/test_project/test_project/settings.py +++ b/test_project/test_project/settings.py @@ -137,7 +137,6 @@ def __getitem__(self, item): INSTALLED_APPS = ( 'django.contrib.auth', 'django.contrib.contenttypes', - 'django_extensions', 'django_elasticsearch', 'test_app' )