devicehub-teal/ereuse_devicehub/resources/device/search.py

from itertools import chain

import inflection
from sqlalchemy.dialects import postgresql
from sqlalchemy.dialects.postgresql import TSVECTOR
from sqlalchemy.orm import aliased

from ereuse_devicehub.db import db
from ereuse_devicehub.resources import search
from ereuse_devicehub.resources.agent.models import Organization
from ereuse_devicehub.resources.device.models import Component, Computer, Device
from ereuse_devicehub.resources.event.models import Event, EventWithMultipleDevices, \
    EventWithOneDevice
from ereuse_devicehub.resources.tag.model import Tag


class DeviceSearch(db.Model):
    """Temporary table that stores full-text device documents.

    It provides methods to auto-run
    """
    device_id = db.Column(db.BigInteger,
                          db.ForeignKey(Device.id, ondelete='CASCADE'),
                          primary_key=True)
    device = db.relationship(Device, primaryjoin=Device.id == device_id)

    properties = db.Column(TSVECTOR, nullable=False)
    tags = db.Column(TSVECTOR)

    __table_args__ = (
        # todo to add concurrency this should be commited separately
        #   see https://docs.sqlalchemy.org/en/latest/dialects/postgresql.html#indexes-with-concurrently
        db.Index('properties gist', properties, postgresql_using='gist'),
        db.Index('tags gist', tags, postgresql_using='gist'),
        {
            'prefixes': ['UNLOGGED']
            # Only for temporal tables, can cause table to empty on turn on
        }

    )

    @classmethod
    def update_modified_devices(cls, session: db.Session):
        """Updates the documents of the devices that are part of a
        modified event, or tag in the passed-in session.

        This method is registered as a SQLAlchemy listener in the
        Devicehub class.
        """
        devices_to_update = set()
        for model in chain(session.new, session.dirty):
            if isinstance(model, Event):
                if isinstance(model, EventWithMultipleDevices):
                    devices_to_update |= model.devices
                elif isinstance(model, EventWithOneDevice):
                    devices_to_update.add(model.device)
                if model.parent:
                    devices_to_update.add(model.parent)
                devices_to_update |= model.components
            elif isinstance(model, Tag) and model.device:
                devices_to_update.add(model.device)

        # this flush is controversial:
        # see https://groups.google.com/forum/#!topic/sqlalchemy/hBzfypgPfYo
        # todo probably should replace it with what the solution says
        session.flush()
        for device in (d for d in devices_to_update if not isinstance(d, Component)):
            cls.set_device_tokens(session, device)

    @classmethod
    def set_all_devices_tokens_if_empty(cls, session: db.Session):
        """Generates the search docs if the table is empty.

        This can happen if Postgres' shut down unexpectedly, as
        it deletes unlogged tables as ours.
        """
        if not DeviceSearch.query.first():
            cls.regenerate_search_table(session)

    @classmethod
    def regenerate_search_table(cls, session: db.Session):
        """Deletes and re-computes all the search table."""
        DeviceSearch.query.delete()
        for device in Device.query:
            if not isinstance(device, Component):
                cls.set_device_tokens(session, device)

    @classmethod
    def set_device_tokens(cls, session: db.Session, device: Device):
        """(Re)Generates the device search tokens."""
        assert not isinstance(device, Component)

        tokens = [
            (str(device.id), search.Weight.A),
            (inflection.humanize(device.type), search.Weight.B),
            (Device.model, search.Weight.B),
            (Device.manufacturer, search.Weight.C),
            (Device.serial_number, search.Weight.A)
        ]

        if device.manufacturer:
            # todo this has to be done using a dictionary
            manufacturer = device.manufacturer.lower()
            if 'asus' in manufacturer:
                tokens.append(('asus', search.Weight.B))
            if 'hewlett' in manufacturer or 'hp' in manufacturer or 'h.p' in manufacturer:
                tokens.append(('hp', search.Weight.B))
                tokens.append(('h.p', search.Weight.C))
                tokens.append(('hewlett', search.Weight.C))
                tokens.append(('packard', search.Weight.C))

        if isinstance(device, Computer):
            # Aggregate the values of all the components of pc
            Comp = aliased(Component)
            tokens.extend((
                (db.func.string_agg(db.cast(Comp.id, db.TEXT), ' '), search.Weight.D),
                (db.func.string_agg(Comp.model, ' '), search.Weight.C),
                (db.func.string_agg(Comp.manufacturer, ' '), search.Weight.D),
                (db.func.string_agg(Comp.serial_number, ' '), search.Weight.B),
                (db.func.string_agg(Comp.type, ' '), search.Weight.B),
                ('Computer', search.Weight.C),
                ('PC', search.Weight.C),
                (inflection.humanize(device.chassis.name), search.Weight.B),
            ))

        properties = session \
            .query(search.Search.vectorize(*tokens)) \
            .filter(Device.id == device.id)

        if isinstance(device, Computer):
            # Join to components
            properties = properties \
                .outerjoin(Comp, Computer.components) \
                .group_by(Device.id)

        tags = session.query(
            search.Search.vectorize(
                (db.func.string_agg(Tag.id, ' '), search.Weight.A),
                (db.func.string_agg(Tag.secondary, ' '), search.Weight.A),
                (db.func.string_agg(Organization.name, ' '), search.Weight.B)
            )
        ).filter(Tag.device_id == device.id).join(Tag.org)

        # Note that commit flushes later
        # todo see how to get rid of the one_or_none() by embedding those as subqueries
        # I don't like this but I want the 'on_conflict_on_update' thingie
        device_document = dict(properties=properties.one_or_none(), tags=tags.one_or_none())
        insert = postgresql.insert(DeviceSearch.__table__) \
            .values(device_id=device.id, **device_document) \
            .on_conflict_do_update(constraint='device_search_pkey', set_=device_document)
        session.execute(insert)