From 6c4c89ac4814b99db653ad41a48f71b59d9d4d39 Mon Sep 17 00:00:00 2001 From: Xavier Bustamante Talavera Date: Thu, 7 Feb 2019 13:47:42 +0100 Subject: [PATCH] Use Postgres 11; enhance search query with websearch; really use indexes, add hash index; fix partially patching lots --- README.md | 7 ++--- ereuse_devicehub/resources/agent/models.py | 5 ++-- ereuse_devicehub/resources/device/models.py | 27 +++++++++++++------ ereuse_devicehub/resources/device/search.py | 24 +++++++++-------- ereuse_devicehub/resources/event/models.py | 16 ++++++++--- ereuse_devicehub/resources/inventory/model.py | 6 ++++- ereuse_devicehub/resources/lot/models.py | 5 ++-- ereuse_devicehub/resources/lot/views.py | 3 ++- ereuse_devicehub/resources/search.py | 4 +-- ereuse_devicehub/resources/tag/model.py | 7 +++-- examples/create-db.sh | 1 + tests/test_lot.py | 1 + 12 files changed, 69 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index f4c3dbad..db54c0af 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,8 @@ Devicehub is built with [Teal](https://github.com/bustawin/teal) and The requirements are: - Python 3.5.3 or higher. In debian 9 is `# apt install python3-pip`. -- PostgreSQL 9.6 or higher with pgcrypto and ltree. - In debian 9 is `# apt install postgresql-contrib` -- passlib. In debian 9 is `# apt install python3-passlib`. -- Weasyprint requires some system packages. - [Their docs explain which ones and how to install them](http://weasyprint.readthedocs.io/en/stable/install.html). +- [PostgreSQL 11 or higher](https://www.postgresql.org/download/). +- Weasyprint [dependencies](http://weasyprint.readthedocs.io/en/stable/install.html). Install Devicehub with *pip*: `pip3 install ereuse-devicehub -U --pre`. diff --git a/ereuse_devicehub/resources/agent/models.py b/ereuse_devicehub/resources/agent/models.py index 19a76ee6..930c39e9 100644 --- a/ereuse_devicehub/resources/agent/models.py +++ b/ereuse_devicehub/resources/agent/models.py @@ -27,7 +27,7 @@ class JoinedTableMixin: class Agent(Thing): id = Column(UUID(as_uuid=True), primary_key=True, default=uuid4) - type = Column(Unicode, nullable=False, index=True) + type = Column(Unicode, nullable=False) name = Column(CIText()) name.comment = """ The name of the organization or person. @@ -46,7 +46,8 @@ class Agent(Thing): __table_args__ = ( UniqueConstraint(tax_id, country, name='Registration Number per country.'), - UniqueConstraint(tax_id, name, name='One tax ID with one name.') + UniqueConstraint(tax_id, name, name='One tax ID with one name.'), + db.Index('agent_type', type, postgresql_using='hash') ) @declared_attr diff --git a/ereuse_devicehub/resources/device/models.py b/ereuse_devicehub/resources/device/models.py index 5ef33723..4bbbdc59 100644 --- a/ereuse_devicehub/resources/device/models.py +++ b/ereuse_devicehub/resources/device/models.py @@ -7,7 +7,7 @@ from typing import Dict, List, Set from boltons import urlutils from citext import CIText -from ereuse_utils.naming import Naming, HID_CONVERSION_DOC +from ereuse_utils.naming import HID_CONVERSION_DOC, Naming from more_itertools import unique_everseen from sqlalchemy import BigInteger, Boolean, Column, Enum as DBEnum, Float, ForeignKey, Integer, \ Sequence, SmallInteger, Unicode, inspect, text @@ -49,7 +49,7 @@ class Device(Thing): The identifier of the device for this database. Used only internally for software; users should not use this. """ - type = Column(Unicode(STR_SM_SIZE), nullable=False, index=True) + type = Column(Unicode(STR_SM_SIZE), nullable=False) hid = Column(Unicode(), check_lower('hid'), unique=True) hid.comment = """ The Hardware ID (HID) is the unique ID traceability systems @@ -110,6 +110,11 @@ class Device(Thing): 'weight' } + __table_args__ = ( + db.Index('device_id', id, postgresql_using='hash'), + db.Index('type_index', type, postgresql_using='hash') + ) + def __init__(self, **kw) -> None: super().__init__(**kw) with suppress(TypeError): @@ -476,7 +481,7 @@ class Component(Device): """A device that can be inside another device.""" id = Column(BigInteger, ForeignKey(Device.id), primary_key=True) - parent_id = Column(BigInteger, ForeignKey(Computer.id), index=True) + parent_id = Column(BigInteger, ForeignKey(Computer.id)) parent = relationship(Computer, backref=backref('components', lazy=True, @@ -485,6 +490,10 @@ class Component(Device): collection_class=OrderedSet), primaryjoin=parent_id == Computer.id) + __table_args__ = ( + db.Index('parent_index', parent_id, postgresql_using='hash'), + ) + def similar_one(self, parent: Computer, blacklist: Set[int]) -> 'Component': """ Gets a component that: @@ -720,19 +729,21 @@ class Manufacturer(db.Model): Ideally users should use the names from this list when submitting devices. """ - __table_args__ = {'schema': 'common'} CSV_DELIMITER = csv.get_dialect('excel').delimiter - name = db.Column(CIText(), - primary_key=True, - # from https://niallburkley.com/blog/index-columns-for-like-in-postgres/ - index=db.Index('name', text('name gin_trgm_ops'), postgresql_using='gin')) + name = db.Column(CIText(), primary_key=True) name.comment = """The normalized name of the manufacturer.""" url = db.Column(URL(), unique=True) url.comment = """An URL to a page describing the manufacturer.""" logo = db.Column(URL()) logo.comment = """An URL pointing to the logo of the manufacturer.""" + __table_args__ = ( + # from https://niallburkley.com/blog/index-columns-for-like-in-postgres/ + db.Index('name_index', text('name gin_trgm_ops'), postgresql_using='gin'), + {'schema': 'common'} + ) + @classmethod def add_all_to_session(cls, session: db.Session): """Adds all manufacturers to session.""" diff --git a/ereuse_devicehub/resources/device/search.py b/ereuse_devicehub/resources/device/search.py index ac76c41d..65c55fc4 100644 --- a/ereuse_devicehub/resources/device/search.py +++ b/ereuse_devicehub/resources/device/search.py @@ -24,18 +24,20 @@ class DeviceSearch(db.Model): primary_key=True) device = db.relationship(Device, primaryjoin=Device.id == device_id) - properties = db.Column(TSVECTOR, - nullable=False, - index=db.Index('properties gist', - postgresql_using='gist', - postgresql_concurrently=True)) - tags = db.Column(TSVECTOR, index=db.Index('tags gist', - postgresql_using='gist', - postgresql_concurrently=True)) + properties = db.Column(TSVECTOR, nullable=False) + tags = db.Column(TSVECTOR) - __table_args__ = { - 'prefixes': ['UNLOGGED'] # Only for temporal tables, can cause table to empty on turn on - } + __table_args__ = ( + # todo to add concurrency this should be commited separately + # see https://docs.sqlalchemy.org/en/latest/dialects/postgresql.html#indexes-with-concurrently + db.Index('properties gist', properties, postgresql_using='gist'), + db.Index('tags gist', tags, postgresql_using='gist'), + { + 'prefixes': ['UNLOGGED'] + # Only for temporal tables, can cause table to empty on turn on + } + + ) @classmethod def update_modified_devices(cls, session: db.Session): diff --git a/ereuse_devicehub/resources/event/models.py b/ereuse_devicehub/resources/event/models.py index 9efc9e0f..cad8f8ee 100644 --- a/ereuse_devicehub/resources/event/models.py +++ b/ereuse_devicehub/resources/event/models.py @@ -49,7 +49,7 @@ class Event(Thing): This class extends `Schema's Action `_. """ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid4) - type = Column(Unicode, nullable=False, index=True) + type = Column(Unicode, nullable=False) name = Column(CIText(), default='', nullable=False) name.comment = """ A name or title for the event. Used when searching for events. @@ -146,7 +146,7 @@ class Event(Thing): For Add and Remove though, this has another meaning: the components that are added or removed. """ - parent_id = Column(BigInteger, ForeignKey(Computer.id), index=True) + parent_id = Column(BigInteger, ForeignKey(Computer.id)) parent = relationship(Computer, backref=backref('events_parent', lazy=True, @@ -161,6 +161,12 @@ class Event(Thing): would point to the computer that contained this data storage, if any. """ + __table_args__ = ( + db.Index('ix_id', id, postgresql_using='hash'), + db.Index('ix_type', type, postgresql_using='hash'), + db.Index('ix_parent_id', parent_id, postgresql_using='hash') + ) + @property def elapsed(self): """Returns the elapsed time with seconds precision.""" @@ -230,7 +236,7 @@ class JoinedWithOneDeviceMixin: class EventWithOneDevice(JoinedTableMixin, Event): - device_id = Column(BigInteger, ForeignKey(Device.id), nullable=False, index=True) + device_id = Column(BigInteger, ForeignKey(Device.id), nullable=False) device = relationship(Device, backref=backref('events_one', lazy=True, @@ -239,6 +245,10 @@ class EventWithOneDevice(JoinedTableMixin, Event): collection_class=OrderedSet), primaryjoin=Device.id == device_id) + __table_args__ = ( + db.Index('event_one_device_id_index', device_id, postgresql_using='hash'), + ) + def __repr__(self) -> str: return '<{0.t} {0.id} {0.severity} device={0.device!r}>'.format(self) diff --git a/ereuse_devicehub/resources/inventory/model.py b/ereuse_devicehub/resources/inventory/model.py index 70a75aae..98a86c97 100644 --- a/ereuse_devicehub/resources/inventory/model.py +++ b/ereuse_devicehub/resources/inventory/model.py @@ -6,7 +6,6 @@ from ereuse_devicehub.resources.models import Thing class Inventory(Thing): - __table_args__ = {'schema': 'common'} id = db.Column(db.Unicode(), primary_key=True) id.comment = """The name of the inventory as in the URL and schema.""" name = db.Column(db.CIText(), nullable=False, unique=True) @@ -16,6 +15,11 @@ class Inventory(Thing): tag_token.comment = """The token to access a Tag service.""" org_id = db.Column(db.UUID(as_uuid=True), db.ForeignKey('organization.id'), nullable=False) + __table_args__ = ( + db.Index('id_hash', id, postgresql_using='hash'), + {'schema': 'common'} + ) + @classproperty def current(cls) -> 'Inventory': """The inventory of the current_app.""" diff --git a/ereuse_devicehub/resources/lot/models.py b/ereuse_devicehub/resources/lot/models.py index ce0f986a..4cc08882 100644 --- a/ereuse_devicehub/resources/lot/models.py +++ b/ereuse_devicehub/resources/lot/models.py @@ -182,7 +182,7 @@ class Path(db.Model): id = db.Column(db.UUID(as_uuid=True), primary_key=True, server_default=db.text('gen_random_uuid()')) - lot_id = db.Column(db.UUID(as_uuid=True), db.ForeignKey(Lot.id), nullable=False, index=True) + lot_id = db.Column(db.UUID(as_uuid=True), db.ForeignKey(Lot.id), nullable=False) lot = db.relationship(Lot, backref=db.backref('paths', lazy=True, @@ -199,7 +199,8 @@ class Path(db.Model): # dag.delete_edge needs to disable internally/temporarily the unique constraint db.UniqueConstraint(path, name='path_unique', deferrable=True, initially='immediate'), db.Index('path_gist', path, postgresql_using='gist'), - db.Index('path_btree', path, postgresql_using='btree') + db.Index('path_btree', path, postgresql_using='btree'), + db.Index('lot_id_index', lot_id, postgresql_using='hash') ) def __init__(self, lot: Lot) -> None: diff --git a/ereuse_devicehub/resources/lot/views.py b/ereuse_devicehub/resources/lot/views.py index eb272c34..a5d45c22 100644 --- a/ereuse_devicehub/resources/lot/views.py +++ b/ereuse_devicehub/resources/lot/views.py @@ -41,7 +41,8 @@ class LotView(View): return ret def patch(self, id): - l = request.get_json() + patch_schema = self.resource_def.SCHEMA(only=('name', 'description'), partial=True) + l = request.get_json(schema=patch_schema) lot = Lot.query.filter_by(id=id).one() for key, value in l.items(): setattr(lot, key, value) diff --git a/ereuse_devicehub/resources/search.py b/ereuse_devicehub/resources/search.py index 50b272b1..4a18dfee 100644 --- a/ereuse_devicehub/resources/search.py +++ b/ereuse_devicehub/resources/search.py @@ -30,12 +30,12 @@ class Search: @staticmethod def match(column: db.Column, search: str, lang=LANG): """Query that matches a TSVECTOR column with search words.""" - return column.op('@@')(db.func.plainto_tsquery(lang, search)) + return column.op('@@')(db.func.websearch_to_tsquery(lang, search)) @staticmethod def rank(column: db.Column, search: str, lang=LANG): """Query that ranks a TSVECTOR column with search words.""" - return db.func.ts_rank(column, db.func.plainto_tsquery(lang, search)) + return db.func.ts_rank(column, db.func.websearch_to_tsquery(lang, search)) @staticmethod def _vectorize(col: db.Column, weight: Weight = Weight.D, lang=LANG): diff --git a/ereuse_devicehub/resources/tag/model.py b/ereuse_devicehub/resources/tag/model.py index 367db2f1..b0502a44 100644 --- a/ereuse_devicehub/resources/tag/model.py +++ b/ereuse_devicehub/resources/tag/model.py @@ -44,8 +44,7 @@ class Tag(Thing): """ device_id = Column(BigInteger, # We don't want to delete the tag on device deletion, only set to null - ForeignKey(Device.id, ondelete=DB_CASCADE_SET_NULL), - index=True) + ForeignKey(Device.id, ondelete=DB_CASCADE_SET_NULL)) device = relationship(Device, backref=backref('tags', lazy=True, collection_class=Tags), primaryjoin=Device.id == device_id) @@ -56,6 +55,10 @@ class Tag(Thing): constraints as the main one. Only needed in special cases. """ + __table_args__ = ( + db.Index('device_id_index', device_id, postgresql_using='hash'), + ) + def __init__(self, id: str, **kwargs) -> None: super().__init__(id=id, **kwargs) diff --git a/examples/create-db.sh b/examples/create-db.sh index f9848904..89c532ab 100644 --- a/examples/create-db.sh +++ b/examples/create-db.sh @@ -11,3 +11,4 @@ psql -d $1 -c "GRANT ALL PRIVILEGES ON DATABASE $1 TO $2;" # Give access to the psql -d $1 -c "CREATE EXTENSION pgcrypto SCHEMA public;" # Enable pgcrypto psql -d $1 -c "CREATE EXTENSION ltree SCHEMA public;" # Enable ltree psql -d $1 -c "CREATE EXTENSION citext SCHEMA public;" # Enable citext +psql -d $1 -c "CREATE EXTENSION pg_trgm SCHEMA public;" # Enable pg_trgm diff --git a/tests/test_lot.py b/tests/test_lot.py index 21799a05..937e1db8 100644 --- a/tests/test_lot.py +++ b/tests/test_lot.py @@ -75,6 +75,7 @@ def test_lot_modify_patch_endpoint_and_delete(user: UserClient): l_after, _ = user.get(res=Lot, item=l['id']) assert l_after['name'] == 'bar' assert l_after['description'] == 'bax' + user.patch({'description': 'bax'}, res=Lot, item=l['id'], status=204) user.delete(res=Lot, item=l['id'], status=204) user.get(res=Lot, item=l['id'], status=404)