From 759e4e38c3ac090d0fd6b3b82f8e141bb9de38a7 Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Mon, 17 Apr 2023 05:12:37 -0400 Subject: [PATCH 001/197] Adding psql index columns and re-index functionality for users and attendees --- src/onegov/activity/models/attendee.py | 59 ++++++++++++++++++++++-- src/onegov/search/cli.py | 32 ++++++++----- src/onegov/search/upgrade.py | 64 ++++++++++++++++++++++++++ src/onegov/search/utils.py | 20 +++++++- src/onegov/user/models/user.py | 64 +++++++++++++++++++++++--- tests/onegov/search/test_utils.py | 12 +++++ 6 files changed, 228 insertions(+), 23 deletions(-) diff --git a/src/onegov/activity/models/attendee.py b/src/onegov/activity/models/attendee.py index e1736209dd..64a8d64af4 100644 --- a/src/onegov/activity/models/attendee.py +++ b/src/onegov/activity/models/attendee.py @@ -1,4 +1,5 @@ from datetime import date + from onegov.activity.models.booking import Booking from onegov.core.orm import Base from onegov.core.orm.mixins import TimestampMixin @@ -11,7 +12,6 @@ from sqlalchemy import Date from sqlalchemy import Float from sqlalchemy import ForeignKey -from sqlalchemy import Index from sqlalchemy import Integer from sqlalchemy import Numeric from sqlalchemy import Text @@ -175,6 +175,57 @@ def happiness(cls, period_id): backref='attendee' ) - __table_args__ = ( - Index('unique_child_name', 'username', 'name', unique=True), - ) + @staticmethod + def drop_fts_index(session, schema): + """ + Drops the full text search index. Used for re-indexing + + :param session: db session + :param schema: schema on which the fts index shall be dropped + :return: + """ + query = f""" +DROP INDEX IF EXISTS "{schema}".fts_idx_attendees_username_name +""" + print(f'dropping index query: {query}') + session.execute(query) + session.execute("COMMIT") + + @staticmethod + def create_fts_index(session, schema): + """ + Creates the full text search index based on the separate index + column. Used for re-indexing + + :param session: db session + :param schema: schema the index shall be created + :return: + """ + query = f""" +CREATE INDEX fts_idx_attendees_username_name ON +"{schema}".attendees USING GIN (fts_idx_attendees_username_name_col); +""" + print(f'create index query: {query}') + session.execute(query) + session.execute("COMMIT") + + @staticmethod + def add_fts_column(session, schema): + """ + This function is used as migration step moving to postgressql full + text search, OGC-508. It adds a separate column for the tsvector + + :param session: db session + :param schema: schema the full text column shall be added + :return: None + """ + from onegov.search.utils import create_tsvector_string + + s = create_tsvector_string('username', 'name') + query = f""" +ALTER TABLE "{schema}".attendees ADD COLUMN +fts_idx_attendees_username_name_col tsvector GENERATED ALWAYS AS +(to_tsvector('german', {s})) STORED; +""" + session.execute(query) + session.execute("COMMIT") diff --git a/src/onegov/search/cli.py b/src/onegov/search/cli.py index c87cc86147..4e9f60f2d7 100644 --- a/src/onegov/search/cli.py +++ b/src/onegov/search/cli.py @@ -5,6 +5,8 @@ from onegov.core.cli import command_group, pass_group_context from sedate import utcnow +from onegov.core.orm import Base +from onegov.search.utils import searchable_sqlalchemy_models cli = command_group() @@ -13,18 +15,26 @@ @click.option('--fail', is_flag=True, default=False, help='Fail on errors') @pass_group_context def reindex(group_context, fail): - """ Reindexes all objects in the elasticsearch database. """ - - def run_reindex(request, app): - if not hasattr(request.app, 'es_client'): - return - - title = f"Reindexing {request.app.application_id}" - print(click.style(title, underline=True)) - + """ Reindexes all objects in the postgresql database. """ + + def run_reindex_psql(request, app): + """ + Looping over all models in project deleting all full text search ( + fts) indexes in postgresql and re-creating them + + :param request: request + :param app: application context + :return: re-indexing function + """ + session = request.session start = utcnow() - request.app.es_perform_reindex(fail) + + for model in searchable_sqlalchemy_models(Base): + print(f'*** model to reindex: {model}') + if model.__tablename__ in ['users', 'attendees']: + model.drop_fts_index(session, app.schema) + model.create_fts_index(session, app.schema) print(f"took {utcnow() - start}") - return run_reindex + return run_reindex_psql diff --git a/src/onegov/search/upgrade.py b/src/onegov/search/upgrade.py index b1901e41b7..0f3b00a0a9 100644 --- a/src/onegov/search/upgrade.py +++ b/src/onegov/search/upgrade.py @@ -2,3 +2,67 @@ upgraded on the server. See :class:`onegov.core.upgrade.upgrade_task`. """ + +from onegov.core.orm import Base +from onegov.core.upgrade import upgrade_task +from onegov.search.utils import searchable_sqlalchemy_models + + +@upgrade_task('Adding full text search index column to postgres 29') +def adding_full_text_search_columns_to_postgres(context): + # need to create all indexes in postgresql on every model in project + # for full text search. This will make elastic search setup obsolete. + # Ticket reference: ogc-508 + # + # NOTE: This task can only be removed once all production systems got + # this upgrade + # + # onegov-core --select /onegov_org/risch upgrade + # onegov-core --select /onegov_org/* upgrade + # onegov-core --select /onegov_town6/meggen upgrade + # onegov-core --select /onegov_town6/* upgrade + + session = context.session + schema = context.schema + + for model in searchable_sqlalchemy_models(Base): + print(f'*** model to migrate: {model}') + if model.__tablename__ in ['users', 'attendees']: + model.add_fts_column(session, schema) + model.create_fts_index(session, schema) + + # def generate_email(): + # import random + # validchars = 'abcdefghijklmnopqrstuvwxyz1234567890' + # loginlen = random.randint(4, 15) + # login = '' + # for i in range(loginlen): + # pos = random.randint(0, len(validchars) - 1) + # login = login + validchars[pos] + # if login[0].isnumeric(): + # pos = random.randint(0, len(validchars) - 10) + # login = validchars[pos] + login + # servers = ['@meggen'] + # servpos = random.randint(0, len(servers) - 1) + # email = login + servers[servpos] + # tlds = ['.ch'] + # tldpos = random.randint(0, len(tlds) - 1) + # email = email + tlds[tldpos] + # return email + # + # def generate_users(count=1000): + # users = list() + # + # for i in range(count): + # users.append( + # User( + # username=generate_email(), + # password_hash='test_password', + # role='member' + # ) + # ) + # return users + + # click.secho('Adding users...') + # for user in generate_users(10000): + # context.session.add(user) diff --git a/src/onegov/search/utils.py b/src/onegov/search/utils.py index a019fbe590..09b458cf78 100644 --- a/src/onegov/search/utils.py +++ b/src/onegov/search/utils.py @@ -8,7 +8,6 @@ from langdetect.utils.lang_profile import LangProfile from onegov.core.orm import find_models - # XXX this is doubly defined in onegov.org.utils, maybe move to a common # regex module in in onegov.core HASHTAG = re.compile(r'#\w{3,}') @@ -150,3 +149,22 @@ def detect(self, text): def probabilities(self, text): return self.spawn_detector(text).get_probabilities() + + +def create_tsvector_string(*cols): + """ + Creates tsvector string for columns + Doc reference: + https://www.postgresql.org/docs/current/textsearch-tables.html#TEXTSEARCH-TABLES-INDEX + + :param cols: columns to be indexed + :return: tsvector string for multiple columns + """ + base = "coalesce({}, '')" + ext = " || ' ' || coalesce({}, '')" + + s = base + for _ in range(len(cols) - 1): + s += ext + + return s.format(*cols) diff --git a/src/onegov/user/models/user.py b/src/onegov/user/models/user.py index c0f3455223..d91535af03 100644 --- a/src/onegov/user/models/user.py +++ b/src/onegov/user/models/user.py @@ -1,4 +1,5 @@ from datetime import datetime + from onegov.core.crypto import hash_password, verify_password from onegov.core.orm import Base from onegov.core.orm.mixins import data_property, TimestampMixin @@ -9,8 +10,7 @@ from onegov.core.utils import yubikey_otp_to_serial from onegov.search import ORMSearchable from onegov.user.models.group import UserGroup -from sqlalchemy import Boolean, Column, Index, Text, func, ForeignKey -from sqlalchemy import UniqueConstraint +from sqlalchemy import Boolean, Column, Text, func, ForeignKey from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.orm import backref, deferred, relationship from uuid import uuid4 @@ -124,11 +124,6 @@ def userprofile(self): #: the signup token used by the user signup_token = Column(Text, nullable=True, default=None) - __table_args__ = ( - Index('lowercase_username', func.lower(username), unique=True), - UniqueConstraint('source', 'source_id', name='unique_source_id'), - ) - @hybrid_property def title(self): """ Returns the realname or the username of the user, depending on @@ -289,3 +284,58 @@ def logout_all_sessions(self, app): self.cleanup_sessions(app) return count + + @staticmethod + def drop_fts_index(session, schema): + """ + Drops the full text search index. Used for re-indexing + + :param session: db session + :param schema: schema on which the fts index shall be dropped + :return: + """ + query = f""" +DROP INDEX IF EXISTS "{schema}".fts_idx_users_username +""" + print(f'dropping index query: {query}') + session.execute(query) + session.execute("COMMIT") + + @staticmethod + def create_fts_index(session, schema): + """ + Creates the full text search index based on the separate index + column. Used for re-indexing + + :param session: db session + :param schema: schema the index shall be created + :return: + """ + query = f""" +CREATE INDEX fts_idx_users_username ON "{schema}".users USING +GIN (fts_idx_users_username_col); +""" + print(f'create index query: {query}') + session.execute(query) + session.execute("COMMIT") + + @staticmethod + def add_fts_column(session, schema): + """ + This function is used as migration step moving to postgressql full + text search, OGC-508. It adds a separate column for the tsvector + + :param session: db session + :param schema: schema the full text column shall be added + :return: None + """ + from onegov.search.utils import create_tsvector_string + + s = create_tsvector_string('username') + query = f""" +ALTER TABLE "{schema}".users ADD COLUMN +fts_idx_users_username_col tsvector GENERATED ALWAYS AS +(to_tsvector('german', {s})) STORED; +""" + session.execute(query) + session.execute("COMMIT") diff --git a/tests/onegov/search/test_utils.py b/tests/onegov/search/test_utils.py index f5c29c3a97..43705d1eb5 100644 --- a/tests/onegov/search/test_utils.py +++ b/tests/onegov/search/test_utils.py @@ -3,6 +3,8 @@ from sqlalchemy import Column, Integer, Text from sqlalchemy.ext.declarative import declarative_base +from onegov.search.utils import create_tsvector_string + def test_get_searchable_sqlalchemy_models(postgres_dsn): Foo = declarative_base() @@ -101,3 +103,13 @@ class News(Page): es_type_name = 'news' assert utils.related_types(Page) == {'news', 'topic'} + + +def test_create_tsvector_string(): + assert create_tsvector_string('username') == \ + "coalesce(username, '')" + assert create_tsvector_string('title', 'body') == \ + "coalesce(title, '') || ' ' || coalesce(body, '')" + assert create_tsvector_string('alpha', 'beta', 'gamma') == \ + "coalesce(alpha, '') || ' ' || coalesce(beta, '') || ' ' || " \ + "coalesce(gamma, '')" From 0c4159adc17f73fd14b4e389b6a54cf08f2f054f Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Tue, 23 May 2023 13:32:45 +0200 Subject: [PATCH 002/197] Disable ES, make user search working using postgresql --- src/onegov/org/models/search.py | 166 ++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 62 deletions(-) diff --git a/src/onegov/org/models/search.py b/src/onegov/org/models/search.py index bec2c8b521..214b7d0f80 100644 --- a/src/onegov/org/models/search.py +++ b/src/onegov/org/models/search.py @@ -1,10 +1,7 @@ from cached_property import cached_property -from elasticsearch_dsl.function import SF -from elasticsearch_dsl.query import FunctionScore -from elasticsearch_dsl.query import Match -from elasticsearch_dsl.query import MatchPhrase -from elasticsearch_dsl.query import MultiMatch + from onegov.core.collection import Pagination +from onegov.user import User class Search(Pagination): @@ -15,86 +12,80 @@ class Search(Pagination): def __init__(self, request, query, page): self.request = request self.query = query - self.page = page - - @cached_property - def available_documents(self): - search = self.request.app.es_search_by_request(self.request) - return search.count() + self.page = page # page index + print('*** tschupre search __init__') @cached_property def explain(self): + # what is it used for? + print('*** tschupre search explain') return self.request.is_manager and 'explain' in self.request.params @property def q(self): + print('*** tschupre search q') return self.query def __eq__(self, other): + print('*** tschupre search __eq__') return self.page == other.page and self.query == other.query - def subset(self): - return self.batch - @property def page_index(self): + print('*** tschupre search page_index') return self.page def page_by_index(self, index): + print('*** tschupre search page_by_index') return Search(self.request, self.query, index) @cached_property def batch(self): + print('*** tschupre search batch') if not self.query: return None - search = self.request.app.es_search_by_request( - request=self.request, - explain=self.explain - ) - - # queries need to be cut at some point to make sure we're not - # pushing the elasticsearch cluster to the brink - query = self.query[:self.max_query_length] - - if query.startswith('#'): - search = self.hashtag_search(search, query) - else: - search = self.generic_search(search, query) - - return search[self.offset:self.offset + self.batch_size].execute() - - def generic_search(self, search, query): - - # make sure the title matches with a higher priority, otherwise the - # "get lucky" functionality is not so lucky after all - match_title = MatchPhrase(title={"query": query, "boost": 3}) - - # we *could* use Match here and include '_all' fields, but that - # yields us less exact results, probably because '_all' includes some - # metadata fields we have no use for - match_rest = MultiMatch(query=query, fields=[ - field for field in self.request.app.es_mappings.registered_fields - if not field.startswith('es_') - ], fuzziness='1', prefix_length=3) - - search = search.query(match_title | match_rest) - - # favour documents with recent changes, over documents without - search.query = FunctionScore(query=search.query, functions=[ - SF('gauss', es_last_change={ - 'offset': '7d', - 'scale': '90d', - 'decay': '0.99' - }) - ]) - - return search - - def hashtag_search(self, search, query): - return search.query(Match(es_tags=query.lstrip('#'))) + return self.postgres_search() + + # def generic_search(self, search, query): + # print('*** tschupre search generic_search') + # + # # "get lucky" functionality is not so lucky after all + # match_title = MatchPhrase(title={"query": query, "boost": 3}) + # + # # we *could* use Match here and include '_all' fields, but that + # # yields us less exact results, probably because '_all' includes some + # # metadata fields we have no use for + # print('*** registered fields:') + # for field in self.request.app.orm_mappings.registered_fields: + # if not field.startswith('es_'): + # # print(f' * field: {field}') + # pass + # match_rest = MultiMatch(query=query, fields=[ + # field for field in self.request.app.orm_mappings. + # registered_fields + # if not field.startswith('es_') + # ], fuzziness='1', prefix_length=3) + # + # search = search.query(match_title | match_rest) + # + # # favour documents with recent changes, over documents without + # search.query = FunctionScore(query=search.query, functions=[ + # SF('gauss', es_last_change={ + # 'offset': '7d', + # 'scale': '90d', + # 'decay': '0.99' + # }) + # ]) + # + # return search + + # def hashtag_search(self, search, query): + # print('*** tschupre search hastag_search') + # return search.query(Match(es_tags=query.lstrip('#'))) def feeling_lucky(self): + print('*** tschupre search feeling_lucky') if self.batch: first_entry = self.batch[0].load() @@ -106,9 +97,60 @@ def feeling_lucky(self): @cached_property def subset_count(self): + print('*** tschupre search subset_count') + return 1 return self.cached_subset and self.cached_subset.hits.total.value or 0 def suggestions(self): - return tuple(self.request.app.es_suggestions_by_request( - self.request, self.query - )) + print(f'*** tschupre search suggestions for \'{self.query}\'') + return tuple() + # self.query is the search term e.g. 'test' + + # session = self.request.session + # tsquery = func.websearch_to_tsquery(self.query) + # q = session.query(User) + # q = q.filter(User.username.match(self.query)) # work but no results + # q = q.filter(User.__tsvector__.match(self.query)) # works but no + # results + # q = q.filter(User.fts_idx_users_username_col.match(self.query)) # + # q = q.filter(User.__tsvector__.like(self.query)) # not working + # q = q.filter(User.fts_idx_users_username_col.like(self.query)) # + # not working + # works but no results + # q = q.filter(User.username == self.query) # works but no results + # q = q.filter(User.username.like(self.query)) # works but no results + # q = q.filter(User.username.match(self.query)) # works but no results + # results = q.all() + # results = q.limit(5) + # + # print('*** query results') + # for result in results: + # print(f'result: {result}') + # + # # return results, + # es_res = tuple(self.request.app.es_suggestions_by_request( + # self.request, self.query + # )) + # + # print(es_res) + # return tuple(self.request.app.es_suggestions_by_request( + # self.request, self.query + # )) + + def postgres_search(self): + results = [] + print('*** tschupre postgresql_search') + + # this works: collecting results not a final 'search' + query = self.request.session.query(User) + query = query.filter(User.username.ilike(f'%{self.query}%')) + results += query.all() + query = self.request.session.query(User) + query = query.filter(User.realname.ilike(f'%{self.query}%')) + results += query.all() + + print(f'*** psql res count: {len(results)}') + for result in results: + print(f'*** psql res: {result.username}') + + return results From 5e89675c645a53cdc165a3b4caee32728b271522 Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Tue, 23 May 2023 13:34:41 +0200 Subject: [PATCH 003/197] Adding fts index functions to create, drop index as well as db upgrade for users --- src/onegov/people/models/person.py | 57 +++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/src/onegov/people/models/person.py b/src/onegov/people/models/person.py index 81fe88c7b2..57d0559c1a 100644 --- a/src/onegov/people/models/person.py +++ b/src/onegov/people/models/person.py @@ -78,7 +78,7 @@ def spoken_title(self): #: when the person was born born = Column(Text, nullable=True) - #: the professsion of the person + #: the profession of the person profession = Column(Text, nullable=True) #: the function of the person @@ -206,3 +206,58 @@ def sortkey(membership): return membership.order_within_person return sorted(self.memberships, key=sortkey) + + @staticmethod + def drop_fts_index(session, schema): + """ + Drops the full text search index. Used for re-indexing + + :param session: db session + :param schema: schema on which the fts index shall be dropped + :return: + """ + query = f""" +DROP INDEX IF EXISTS "{schema}".fts_idx_people_first_last_title +""" + print(f'dropping index query: {query}') + session.execute(query) + session.execute("COMMIT") + + @staticmethod + def create_fts_index(session, schema): + """ + Creates the full text search index based on the separate index + column. Used for re-indexing + + :param session: db session + :param schema: schema the index shall be created + :return: + """ + query = f""" +CREATE INDEX fts_idx_people_first_last_title ON "{schema}".people USING +GIN (fts_idx_people_first_last_title_col); +""" + print(f'create index query: {query}') + session.execute(query) + session.execute("COMMIT") + + @staticmethod + def add_fts_column(session, schema): + """ + This function is used as migration step moving to postgressql full + text search, OGC-508. It adds a separate column for the tsvector + + :param session: db session + :param schema: schema the full text column shall be added + :return: None + """ + from onegov.search.utils import create_tsvector_string + + s = create_tsvector_string('first_name', 'last_name', 'title') + query = f""" +ALTER TABLE "{schema}".people ADD COLUMN +fts_idx_people_first_last_title_col tsvector GENERATED ALWAYS AS +(to_tsvector('german', {s})) STORED; +""" + session.execute(query) + session.execute("COMMIT") From 4ad864e2b23fbf7e132ad0383a01b8474a9fdc6a Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Tue, 23 May 2023 13:40:06 +0200 Subject: [PATCH 004/197] Adding fts index functions to create, drop index as well as db upgrade for tickets --- src/onegov/ticket/model.py | 55 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/onegov/ticket/model.py b/src/onegov/ticket/model.py index 4100341e05..bb70fea59e 100644 --- a/src/onegov/ticket/model.py +++ b/src/onegov/ticket/model.py @@ -241,6 +241,61 @@ def create_snapshot(self, request): if data: self.snapshot[f'submitter_{info}'] = data + @staticmethod + def drop_fts_index(session, schema): + """ + Drops the full text search index. Used for re-indexing + + :param session: db session + :param schema: schema on which the fts index shall be dropped + :return: + """ + query = f""" +DROP INDEX IF EXISTS "{schema}".fts_idx_ticket_number +""" + print(f'dropping index query: {query}') + session.execute(query) + session.execute("COMMIT") + + @staticmethod + def create_fts_index(session, schema): + """ + Creates the full text search index based on the separate index + column. Used for re-indexing + + :param session: db session + :param schema: schema the index shall be created + :return: + """ + query = f""" +CREATE INDEX fts_idx_ticket_number ON "{schema}".tickets USING +GIN (fts_idx_ticket_number_col); +""" + print(f'create index query: {query}') + session.execute(query) + session.execute("COMMIT") + + @staticmethod + def add_fts_column(session, schema): + """ + This function is used as migration step moving to postgressql full + text search, OGC-508. It adds a separate column for the tsvector + + :param session: db session + :param schema: schema the full text column shall be added + :return: None + """ + from onegov.search.utils import create_tsvector_string + + s = create_tsvector_string('number') + query = f""" +ALTER TABLE "{schema}".users ADD COLUMN +fts_idx_ticket_number_col tsvector GENERATED ALWAYS AS +(to_tsvector('german', {s})) STORED; +""" + session.execute(query) + session.execute("COMMIT") + class TicketPermission(Base, TimestampMixin): """ Defines a custom ticket permission. From 8fcf1e91e2d24b8164ccad462b50b1e0fc4f18db Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Tue, 23 May 2023 13:42:46 +0200 Subject: [PATCH 005/197] Switch from ES app to psql app --- src/onegov/org/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/onegov/org/app.py b/src/onegov/org/app.py index 3e00e4252a..cd1864e26e 100644 --- a/src/onegov/org/app.py +++ b/src/onegov/org/app.py @@ -26,7 +26,7 @@ from onegov.page import Page, PageCollection from onegov.pay import PayApp from onegov.reservation import LibresIntegration -from onegov.search import ElasticsearchApp +from onegov.search import PostgresqlSearchApp from onegov.ticket import TicketCollection from onegov.ticket import TicketPermission from onegov.user import UserApp @@ -34,7 +34,7 @@ from purl import URL -class OrgApp(Framework, LibresIntegration, ElasticsearchApp, MapboxApp, +class OrgApp(Framework, LibresIntegration, PostgresqlSearchApp, MapboxApp, DepotApp, PayApp, FormApp, UserApp, WebsocketsApp): serve_static_files = True From b2087364c416867dfd505064dcacccef4b25abe7 Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Tue, 23 May 2023 13:43:23 +0200 Subject: [PATCH 006/197] Switch from ES app to psql app --- src/onegov/search/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/onegov/search/__init__.py b/src/onegov/search/__init__.py index 679f39b5de..d34b83b9cf 100644 --- a/src/onegov/search/__init__.py +++ b/src/onegov/search/__init__.py @@ -4,11 +4,12 @@ from onegov.search.mixins import Searchable, ORMSearchable, SearchableContent from onegov.search.dsl import Search -from onegov.search.integration import ElasticsearchApp +from onegov.search.integration import PostgresqlSearchApp, ElasticsearchApp from onegov.search.errors import SearchOfflineError __all__ = [ 'ElasticsearchApp', + 'PostgresqlSearchApp', 'ORMSearchable', 'Search', 'Searchable', From 41f31116a5e601d09dbd6173f7d491fe95cdd33f Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Tue, 23 May 2023 13:47:35 +0200 Subject: [PATCH 007/197] Initial move from es to psql search for town6 --- src/onegov/search/integration.py | 134 +++++++++++++++++++++++++++ src/onegov/town6/templates/search.pt | 9 +- 2 files changed, 140 insertions(+), 3 deletions(-) diff --git a/src/onegov/search/integration.py b/src/onegov/search/integration.py index ba1eef9fad..0dae0e58a9 100644 --- a/src/onegov/search/integration.py +++ b/src/onegov/search/integration.py @@ -10,6 +10,8 @@ from elasticsearch import TransportError from elasticsearch.connection import create_ssl_context from more.transaction.main import transaction_tween_factory + +from onegov.core.orm import Base from onegov.search import Search, log from onegov.search.errors import SearchOfflineError from onegov.search.indexer import Indexer @@ -90,6 +92,136 @@ def is_5xx_error(error): return error.status_code and str(error.status_code).startswith('5') +class PostgresqlSearchApp(morepath.App): + """ + Allows to register events: insert, update, remove row + Allows to index a table + Allows to search for a term + """ + + def configure_search(self, **cfg): + self.orm_mappings = TypeMappingRegistry() + + for base in self.session_manager.bases: + self.orm_mappings.register_orm_base(base) + + self.session_manager.on_insert.connect(self._on_insert) + self.session_manager.on_update.connect(self._on_update) + self.session_manager.on_delete.connect(self._on_delete) + + def psql_search_by_request(self, request, types='*', explain=False, + limit_to_request_language=False): + """ Takes the current :class:`~onegov.core.request.CoreRequest` and + returns an elastic search scoped to the current application, the + requests language and it's access rights. + + """ + + # if limit_to_request_language: + # languages = [request.locale.split('_')[0]] + # else: + # languages = '*' + + # return self.psql_search( + # languages=languages, + # types=types, + # include_private=self.may_use_private_search(request), + # explain=explain + # ) + + return None + + def may_use_private_search(self, request): + """ Returns True if the given request is allowed to access private + search results. By default every logged in user has access to those. + + This method may be overwritten if this is not desired. + + """ + return request.is_logged_in + + def _on_insert(self, schema, obj): + print(f'*** tschupre _on_insert {schema} {obj}') + # if not self.stopped: + # if isinstance(obj, Searchable): + # self.session_manager.session.index(schema, obj) + # self.index(schema, obj) + + def _on_update(self, schema, obj): + print(f'*** tschupre _on_update {schema} {obj}') + # if not self.stopped: + # if isinstance(obj, Searchable): + # self.delete(schema, obj) + # self.index(schema, obj) + + def _on_delete(self, schema, obj): + print(f'*** tschupre _on_delete {schema} {obj}') + # if not self.stopped: + # if isinstance(obj, Searchable): + # self.update(schema, obj) + + def psql_perform_reindex(self, session): + for model in searchable_sqlalchemy_models(Base): + print(f'*** model to reindex: {model}') + + # TODO: move to ticket.py + @staticmethod + def drop_fts_index(session, schema): + """ + Drops the full text search index. Used for re-indexing + + :param session: db session + :param schema: schema on which the fts index shall be dropped + :return: + """ + query = f""" +DROP INDEX IF EXISTS "{schema}".fts_idx_tickets_number +""" + print(f'dropping index query: {query}') + session.execute(query) + session.execute("COMMIT") + + @staticmethod + def create_fts_index(session, schema): + """ + Creates the full text search index based on the separate index + column. Used for re-indexing + + :param session: db session + :param schema: schema the index shall be created + :return: + """ + query = f""" +CREATE INDEX fts_idx_tickets_number ON "{schema}".tickets USING +GIN (fts_idx_tickets_number_col); +""" + print(f'create index query: {query}') + session.execute(query) + session.execute("COMMIT") + + @staticmethod + def add_fts_column(session, schema): + """ + This function is used as migration step moving to postgressql full + text search, OGC-508. It adds a separate column for the tsvector + + :param session: db session + :param schema: schema the full text column shall be added + :return: None + """ + from onegov.search.utils import create_tsvector_string + + s = create_tsvector_string('username') + query = f""" +ALTER TABLE "{schema}".users ADD COLUMN +fts_idx_tickets_number_col tsvector GENERATED ALWAYS AS +(to_tsvector('german', {s})) STORED; +""" + session.execute(query) + session.execute("COMMIT") + + +# TODO: REMOVE class ElasticsearchApp(morepath.App): """ Provides elasticsearch integration for :class:`onegov.core.framework.Framework` based applications. @@ -226,6 +358,7 @@ def es_search(self, languages='*', types='*', include_private=False, """ + print(f'*** tschupre es_search mappings: {self.es_mappings}') search = Search( session=self.session(), mappings=self.es_mappings, @@ -326,6 +459,7 @@ def es_suggestions_by_request(self, request, query, types='*', else: languages = '*' + print(f'es_suggestion_by_request language: {languages}') return self.es_suggestions( query, languages=languages, diff --git a/src/onegov/town6/templates/search.pt b/src/onegov/town6/templates/search.pt index 1c25d342de..a10435f4ea 100644 --- a/src/onegov/town6/templates/search.pt +++ b/src/onegov/town6/templates/search.pt @@ -10,6 +10,7 @@ +