Use Postgres 11; enhance search query with websearch; really use indexes, add hash index; fix partially patching lots

This commit is contained in:
Xavier Bustamante Talavera 2019-02-07 13:47:42 +01:00
parent 04358a5506
commit 6c4c89ac48
12 changed files with 69 additions and 37 deletions

View file

@ -23,11 +23,8 @@ Devicehub is built with [Teal](https://github.com/bustawin/teal) and
The requirements are:
- Python 3.5.3 or higher. In debian 9 is `# apt install python3-pip`.
- PostgreSQL 9.6 or higher with pgcrypto and ltree.
In debian 9 is `# apt install postgresql-contrib`
- passlib. In debian 9 is `# apt install python3-passlib`.
- Weasyprint requires some system packages.
[Their docs explain which ones and how to install them](http://weasyprint.readthedocs.io/en/stable/install.html).
- [PostgreSQL 11 or higher](https://www.postgresql.org/download/).
- Weasyprint [dependencies](http://weasyprint.readthedocs.io/en/stable/install.html).
Install Devicehub with *pip*: `pip3 install ereuse-devicehub -U --pre`.

View file

@ -27,7 +27,7 @@ class JoinedTableMixin:
class Agent(Thing):
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid4)
type = Column(Unicode, nullable=False, index=True)
type = Column(Unicode, nullable=False)
name = Column(CIText())
name.comment = """
The name of the organization or person.
@ -46,7 +46,8 @@ class Agent(Thing):
__table_args__ = (
UniqueConstraint(tax_id, country, name='Registration Number per country.'),
UniqueConstraint(tax_id, name, name='One tax ID with one name.')
UniqueConstraint(tax_id, name, name='One tax ID with one name.'),
db.Index('agent_type', type, postgresql_using='hash')
)
@declared_attr

View file

@ -7,7 +7,7 @@ from typing import Dict, List, Set
from boltons import urlutils
from citext import CIText
from ereuse_utils.naming import Naming, HID_CONVERSION_DOC
from ereuse_utils.naming import HID_CONVERSION_DOC, Naming
from more_itertools import unique_everseen
from sqlalchemy import BigInteger, Boolean, Column, Enum as DBEnum, Float, ForeignKey, Integer, \
Sequence, SmallInteger, Unicode, inspect, text
@ -49,7 +49,7 @@ class Device(Thing):
The identifier of the device for this database. Used only
internally for software; users should not use this.
"""
type = Column(Unicode(STR_SM_SIZE), nullable=False, index=True)
type = Column(Unicode(STR_SM_SIZE), nullable=False)
hid = Column(Unicode(), check_lower('hid'), unique=True)
hid.comment = """
The Hardware ID (HID) is the unique ID traceability systems
@ -110,6 +110,11 @@ class Device(Thing):
'weight'
}
__table_args__ = (
db.Index('device_id', id, postgresql_using='hash'),
db.Index('type_index', type, postgresql_using='hash')
)
def __init__(self, **kw) -> None:
super().__init__(**kw)
with suppress(TypeError):
@ -476,7 +481,7 @@ class Component(Device):
"""A device that can be inside another device."""
id = Column(BigInteger, ForeignKey(Device.id), primary_key=True)
parent_id = Column(BigInteger, ForeignKey(Computer.id), index=True)
parent_id = Column(BigInteger, ForeignKey(Computer.id))
parent = relationship(Computer,
backref=backref('components',
lazy=True,
@ -485,6 +490,10 @@ class Component(Device):
collection_class=OrderedSet),
primaryjoin=parent_id == Computer.id)
__table_args__ = (
db.Index('parent_index', parent_id, postgresql_using='hash'),
)
def similar_one(self, parent: Computer, blacklist: Set[int]) -> 'Component':
"""
Gets a component that:
@ -720,19 +729,21 @@ class Manufacturer(db.Model):
Ideally users should use the names from this list when submitting
devices.
"""
__table_args__ = {'schema': 'common'}
CSV_DELIMITER = csv.get_dialect('excel').delimiter
name = db.Column(CIText(),
primary_key=True,
# from https://niallburkley.com/blog/index-columns-for-like-in-postgres/
index=db.Index('name', text('name gin_trgm_ops'), postgresql_using='gin'))
name = db.Column(CIText(), primary_key=True)
name.comment = """The normalized name of the manufacturer."""
url = db.Column(URL(), unique=True)
url.comment = """An URL to a page describing the manufacturer."""
logo = db.Column(URL())
logo.comment = """An URL pointing to the logo of the manufacturer."""
__table_args__ = (
# from https://niallburkley.com/blog/index-columns-for-like-in-postgres/
db.Index('name_index', text('name gin_trgm_ops'), postgresql_using='gin'),
{'schema': 'common'}
)
@classmethod
def add_all_to_session(cls, session: db.Session):
"""Adds all manufacturers to session."""

View file

@ -24,18 +24,20 @@ class DeviceSearch(db.Model):
primary_key=True)
device = db.relationship(Device, primaryjoin=Device.id == device_id)
properties = db.Column(TSVECTOR,
nullable=False,
index=db.Index('properties gist',
postgresql_using='gist',
postgresql_concurrently=True))
tags = db.Column(TSVECTOR, index=db.Index('tags gist',
postgresql_using='gist',
postgresql_concurrently=True))
properties = db.Column(TSVECTOR, nullable=False)
tags = db.Column(TSVECTOR)
__table_args__ = {
'prefixes': ['UNLOGGED'] # Only for temporal tables, can cause table to empty on turn on
}
__table_args__ = (
# todo to add concurrency this should be commited separately
# see https://docs.sqlalchemy.org/en/latest/dialects/postgresql.html#indexes-with-concurrently
db.Index('properties gist', properties, postgresql_using='gist'),
db.Index('tags gist', tags, postgresql_using='gist'),
{
'prefixes': ['UNLOGGED']
# Only for temporal tables, can cause table to empty on turn on
}
)
@classmethod
def update_modified_devices(cls, session: db.Session):

View file

@ -49,7 +49,7 @@ class Event(Thing):
This class extends `Schema's Action <https://schema.org/Action>`_.
"""
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid4)
type = Column(Unicode, nullable=False, index=True)
type = Column(Unicode, nullable=False)
name = Column(CIText(), default='', nullable=False)
name.comment = """
A name or title for the event. Used when searching for events.
@ -146,7 +146,7 @@ class Event(Thing):
For Add and Remove though, this has another meaning: the components
that are added or removed.
"""
parent_id = Column(BigInteger, ForeignKey(Computer.id), index=True)
parent_id = Column(BigInteger, ForeignKey(Computer.id))
parent = relationship(Computer,
backref=backref('events_parent',
lazy=True,
@ -161,6 +161,12 @@ class Event(Thing):
would point to the computer that contained this data storage, if any.
"""
__table_args__ = (
db.Index('ix_id', id, postgresql_using='hash'),
db.Index('ix_type', type, postgresql_using='hash'),
db.Index('ix_parent_id', parent_id, postgresql_using='hash')
)
@property
def elapsed(self):
"""Returns the elapsed time with seconds precision."""
@ -230,7 +236,7 @@ class JoinedWithOneDeviceMixin:
class EventWithOneDevice(JoinedTableMixin, Event):
device_id = Column(BigInteger, ForeignKey(Device.id), nullable=False, index=True)
device_id = Column(BigInteger, ForeignKey(Device.id), nullable=False)
device = relationship(Device,
backref=backref('events_one',
lazy=True,
@ -239,6 +245,10 @@ class EventWithOneDevice(JoinedTableMixin, Event):
collection_class=OrderedSet),
primaryjoin=Device.id == device_id)
__table_args__ = (
db.Index('event_one_device_id_index', device_id, postgresql_using='hash'),
)
def __repr__(self) -> str:
return '<{0.t} {0.id} {0.severity} device={0.device!r}>'.format(self)

View file

@ -6,7 +6,6 @@ from ereuse_devicehub.resources.models import Thing
class Inventory(Thing):
__table_args__ = {'schema': 'common'}
id = db.Column(db.Unicode(), primary_key=True)
id.comment = """The name of the inventory as in the URL and schema."""
name = db.Column(db.CIText(), nullable=False, unique=True)
@ -16,6 +15,11 @@ class Inventory(Thing):
tag_token.comment = """The token to access a Tag service."""
org_id = db.Column(db.UUID(as_uuid=True), db.ForeignKey('organization.id'), nullable=False)
__table_args__ = (
db.Index('id_hash', id, postgresql_using='hash'),
{'schema': 'common'}
)
@classproperty
def current(cls) -> 'Inventory':
"""The inventory of the current_app."""

View file

@ -182,7 +182,7 @@ class Path(db.Model):
id = db.Column(db.UUID(as_uuid=True),
primary_key=True,
server_default=db.text('gen_random_uuid()'))
lot_id = db.Column(db.UUID(as_uuid=True), db.ForeignKey(Lot.id), nullable=False, index=True)
lot_id = db.Column(db.UUID(as_uuid=True), db.ForeignKey(Lot.id), nullable=False)
lot = db.relationship(Lot,
backref=db.backref('paths',
lazy=True,
@ -199,7 +199,8 @@ class Path(db.Model):
# dag.delete_edge needs to disable internally/temporarily the unique constraint
db.UniqueConstraint(path, name='path_unique', deferrable=True, initially='immediate'),
db.Index('path_gist', path, postgresql_using='gist'),
db.Index('path_btree', path, postgresql_using='btree')
db.Index('path_btree', path, postgresql_using='btree'),
db.Index('lot_id_index', lot_id, postgresql_using='hash')
)
def __init__(self, lot: Lot) -> None:

View file

@ -41,7 +41,8 @@ class LotView(View):
return ret
def patch(self, id):
l = request.get_json()
patch_schema = self.resource_def.SCHEMA(only=('name', 'description'), partial=True)
l = request.get_json(schema=patch_schema)
lot = Lot.query.filter_by(id=id).one()
for key, value in l.items():
setattr(lot, key, value)

View file

@ -30,12 +30,12 @@ class Search:
@staticmethod
def match(column: db.Column, search: str, lang=LANG):
"""Query that matches a TSVECTOR column with search words."""
return column.op('@@')(db.func.plainto_tsquery(lang, search))
return column.op('@@')(db.func.websearch_to_tsquery(lang, search))
@staticmethod
def rank(column: db.Column, search: str, lang=LANG):
"""Query that ranks a TSVECTOR column with search words."""
return db.func.ts_rank(column, db.func.plainto_tsquery(lang, search))
return db.func.ts_rank(column, db.func.websearch_to_tsquery(lang, search))
@staticmethod
def _vectorize(col: db.Column, weight: Weight = Weight.D, lang=LANG):

View file

@ -44,8 +44,7 @@ class Tag(Thing):
"""
device_id = Column(BigInteger,
# We don't want to delete the tag on device deletion, only set to null
ForeignKey(Device.id, ondelete=DB_CASCADE_SET_NULL),
index=True)
ForeignKey(Device.id, ondelete=DB_CASCADE_SET_NULL))
device = relationship(Device,
backref=backref('tags', lazy=True, collection_class=Tags),
primaryjoin=Device.id == device_id)
@ -56,6 +55,10 @@ class Tag(Thing):
constraints as the main one. Only needed in special cases.
"""
__table_args__ = (
db.Index('device_id_index', device_id, postgresql_using='hash'),
)
def __init__(self, id: str, **kwargs) -> None:
super().__init__(id=id, **kwargs)

View file

@ -11,3 +11,4 @@ psql -d $1 -c "GRANT ALL PRIVILEGES ON DATABASE $1 TO $2;" # Give access to the
psql -d $1 -c "CREATE EXTENSION pgcrypto SCHEMA public;" # Enable pgcrypto
psql -d $1 -c "CREATE EXTENSION ltree SCHEMA public;" # Enable ltree
psql -d $1 -c "CREATE EXTENSION citext SCHEMA public;" # Enable citext
psql -d $1 -c "CREATE EXTENSION pg_trgm SCHEMA public;" # Enable pg_trgm

View file

@ -75,6 +75,7 @@ def test_lot_modify_patch_endpoint_and_delete(user: UserClient):
l_after, _ = user.get(res=Lot, item=l['id'])
assert l_after['name'] == 'bar'
assert l_after['description'] == 'bax'
user.patch({'description': 'bax'}, res=Lot, item=l['id'], status=204)
user.delete(res=Lot, item=l['id'], status=204)
user.get(res=Lot, item=l['id'], status=404)