Docstrings in storage.stats.

Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>
2024-12-20 02:24:54 +03:00 · 2019-08-12 16:22:18 +01:00 · 2019-08-12 16:22:18 +01:00 · 182cdcbf24
commit 182cdcbf24
parent 314567d62d
1 changed files with 154 additions and 22 deletions
--- a/synapse/storage/stats.py
+++ b/synapse/storage/stats.py
@ -36,7 +36,7 @@ ABSOLUTE_STATS_FIELDS = {
        "invited_members",
        "left_members",
        "banned_members",
-        "total_events",  # TODO review this list
+        "total_events",
    ),
    "user": ("public_rooms", "private_rooms"),
 }
@ -78,6 +78,16 @@ class StatsStore(StateDeltasStore):
        )

    def quantise_stats_time(self, ts):
+        """
+        Quantises a timestamp to be a multiple of the bucket size.
+
+        Args:
+            ts: the timestamp to quantise, in seconds since the Unix Epoch
+
+        Returns:
+            a timestamp which is divisible by the bucket size,
+            is no later than `ts` and is the largest such timestamp.
+        """
        return (ts // self.stats_bucket_size) * self.stats_bucket_size

    @defer.inlineCallbacks
@ -120,6 +130,10 @@ class StatsStore(StateDeltasStore):

    @defer.inlineCallbacks
    def _populate_stats_prepare(self, progress, batch_size):
+        """
+        This is a background update, which prepares the database for
+        statistics regeneration.
+        """

        if not self.stats_enabled:
            yield self._end_background_update("populate_stats_prepare")
@ -167,6 +181,9 @@ class StatsStore(StateDeltasStore):
                txn.execute(statement)

        def _delete_dirty_skeletons(txn):
+            """
+            Delete pre-existing rows which are incomplete.
+            """
            sqls = """
                DELETE FROM room_stats_current
                WHERE completed_delta_stream_id IS NULL;
@ -202,14 +219,20 @@ class StatsStore(StateDeltasStore):

    @defer.inlineCallbacks
    def _populate_stats_cleanup(self, progress, batch_size):
+        """
+        This is a background update which cleans up after statistics regeneration.
+        """
        # TODO is there really no clean-up to be done?

-        # TODO if not self.stats_enabled ….
+        # TODO if not self.stats_enabled … cleanup.
        yield self._end_background_update("populate_stats_cleanup")
        defer.returnValue(1)

    @defer.inlineCallbacks
    def _populate_stats_process_users(self, progress, batch_size):
+        """
+        This is a background update which regenerates statistics for users.
+        """
        if not self.stats_enabled:
            yield self._end_background_update("populate_stats_process_users")
            defer.returnValue(1)
@ -307,7 +330,6 @@ class StatsStore(StateDeltasStore):
                            "private_rooms": room_counts_by_publicness.get(False, 0),
                        },
                    )
-                    # TODO CHECK: actually want to **overwrite** some of these!
                except OldCollectionRequired:
                    # this can't (shouldn't) actually happen
                    # since we only run the background update for incomplete rows
@ -347,6 +369,9 @@ class StatsStore(StateDeltasStore):

    @defer.inlineCallbacks
    def _populate_stats_process_rooms(self, progress, batch_size):
+        """
+        This is a background update which regenerates statistics for rooms.
+        """
        if not self.stats_enabled:
            yield self._end_background_update("populate_stats_process_rooms")
            defer.returnValue(1)
@ -490,7 +515,6 @@ class StatsStore(StateDeltasStore):
                            "banned_members": membership_counts.get(Membership.BAN, 0),
                        },
                    )
-                    # TODO CHECK: actually want to **overwrite** some of these!
                except OldCollectionRequired:
                    # this can't (shouldn't) actually happen
                    # since we only run the background update for incomplete rows
@ -581,6 +605,18 @@ class StatsStore(StateDeltasStore):
                    continue

    def _count_events_in_room_txn(self, txn, room_id, low_token, high_token):
+        """
+        Count the number of events in a room between two tokens, inclusive.
+        Args:
+            txn (cursor): The database
+            room_id (str): The ID of the room to count events for
+            low_token (int): the minimum stream ordering to count
+            high_token (int): the maximum stream ordering to count
+
+        Returns (int):
+            the number of events
+        """
+
        sql = """
            SELECT COUNT(*) AS num_events
            FROM events
@ -595,6 +631,7 @@ class StatsStore(StateDeltasStore):
        """
        Delete all statistics records.
        TODO obsolete?
+        TODO at least will need updating
        """

        def _delete_all_stats_txn(txn):
@ -606,6 +643,23 @@ class StatsStore(StateDeltasStore):
        return self.runInteraction("delete_all_stats", _delete_all_stats_txn)

    def get_stats_positions(self, for_initial_processor=False):
+        """
+        Returns the stats processor positions.
+
+        Args:
+            for_initial_processor (bool, optional): If true, returns the position
+                promised by the latest stats regeneration, rather than the current
+                incremental processor's position.
+                Otherwise (if false), return the incremental processor's position.
+
+        Returns (dict):
+            Dict containing :-
+                state_delta_stream_id: stream_id of last-processed state delta
+                total_events_min_stream_ordering: stream_ordering of latest-processed
+                    backfilled event, in the context of total_events counting.
+                total_events_max_stream_ordering: stream_ordering of latest-processed
+                    non-backfilled event, in the context of total_events counting.
+        """
        return self._simple_select_one(
            table="stats_incremental_position",
            keyvalues={"is_background_contract": for_initial_processor},
@ -618,6 +672,12 @@ class StatsStore(StateDeltasStore):
        )

    def _get_stats_positions_txn(self, txn, for_initial_processor=False):
+        """
+        See L{get_stats_positions}.
+
+        Args:
+             txn (cursor): Database cursor
+        """
        return self._simple_select_one_txn(
            txn=txn,
            table="stats_incremental_position",
@ -630,6 +690,13 @@ class StatsStore(StateDeltasStore):
        )

    def update_stats_positions(self, positions, for_initial_processor=False):
+        """
+        Updates the stats processor positions.
+
+        Args:
+            positions: See L{get_stats_positions}
+            for_initial_processor: See L{get_stats_positions}
+        """
        if positions is None:
            positions = {
                "state_delta_stream_id": None,
@ -644,6 +711,9 @@ class StatsStore(StateDeltasStore):
        )

    def _update_stats_positions_txn(self, txn, positions, for_initial_processor=False):
+        """
+        See L{update_stats_positions}
+        """
        if positions is None:
            positions = {
                "state_delta_stream_id": None,
@ -709,10 +779,12 @@ class StatsStore(StateDeltasStore):
            size,
        )

-    # TODO fix and account for _current.
    def _get_statistics_for_subject_txn(
        self, txn, stats_type, stats_id, start, size=100
    ):
+        """
+        Transaction-bound version of L{get_statistics_for_subject}.
+        """

        table, id_col = TYPE_TO_TABLE[stats_type]
        selected_columns = list(
@ -740,11 +812,18 @@ class StatsStore(StateDeltasStore):
                allow_none=True,
            )

-            if current is not None and current["end_ts"] is not None:
-                # it is dirty, so contains new information, so should be included
-                current["bucket_size"] = current["end_ts"] - current["start_ts"]
-                del current["start_ts"]
-                return [current] + slice_list
+            if current is not None:
+                completed = current["completed_delta_stream_id"] is not None
+                dirty = current["end_ts"] is not None
+
+                if completed and dirty:
+                    # it is dirty, so contains new information, so should be included
+                    # we don't accept incomplete rows as that would almost certainly
+                    # be giving misinformation, since it is awaiting an
+                    # initial background count
+                    current["bucket_size"] = current["end_ts"] - current["start_ts"]
+                    del current["start_ts"]
+                    return [current] + slice_list
        return slice_list

    def get_all_room_state(self):
@ -753,6 +832,17 @@ class StatsStore(StateDeltasStore):
        )

    def get_room_state(self, room_id):
+        """
+        Returns the current room_state for a room.
+
+        Args:
+            room_id (str): The ID of the room to return state for.
+
+        Returns (dict):
+            Dictionary containing these keys:
+                "name", "topic", "canonical_alias", "avatar", "join_rules",
+                "history_visibility"
+        """
        return self._simple_select_one(
            "room_state",
            {"room_id": room_id},
@ -787,6 +877,14 @@ class StatsStore(StateDeltasStore):
        )

    def _collect_old_txn(self, txn, stats_type, limit=500):
+        """
+        See {collect_old}. Runs only a small batch, specified by limit.
+
+        Returns (bool):
+            True iff there is possibly more to do (i.e. this needs re-running),
+            False otherwise.
+
+        """
        # we do them in batches to prevent concurrent updates from
        # messing us over with lots of retries

@ -801,11 +899,12 @@ class StatsStore(StateDeltasStore):
            )
        )

-        sql = ("SELECT %s FROM %s_current WHERE end_ts <= ? LIMIT %d FOR UPDATE") % (
-            id_col,
-            table,
-            limit,
-        )
+        # `end_ts IS NOT NULL` is for partial index optimisation
+        sql = (
+            "SELECT %s FROM %s_current"
+            " WHERE end_ts <= ? AND end_ts IS NOT NULL"
+            " LIMIT %d FOR UPDATE"
+        ) % (id_col, table, limit)
        txn.execute(sql, (quantised_ts,))
        maybe_more = txn.rowcount == limit
        updates = txn.fetchall()
@ -827,6 +926,19 @@ class StatsStore(StateDeltasStore):

    @defer.inlineCallbacks
    def collect_old(self, stats_type):
+        """
+        Run 'old collection' on current stats rows.
+
+        Old collection is the process of copying dirty (updated) stats rows
+        from the current table to the historical table, when those rows have
+        finished their stats time slice.
+        Collected rows are then cleared of their dirty status.
+
+        Args:
+            stats_type: "room" or "user" – the type of stats to run old collection
+                on.
+
+        """
        while True:
            maybe_more = yield self.runInteraction(
                "stats_collect_old", self._collect_old_txn, stats_type
@ -839,16 +951,18 @@ class StatsStore(StateDeltasStore):
        self, ts, stats_type, stats_id, fields, complete_with_stream_id=None
    ):
        """
+        Updates the statistics for a subject, with a delta (difference/relative
+        change).

        Args:
-            ts (int):
-            stats_type (str):
-            stats_id (str):
+            ts (int): timestamp of the change
+            stats_type (str): "room" or "user" – the kind of subject
+            stats_id (str): the subject's ID (room ID or user ID)
            fields (dict[str, int]): Deltas of stats values.
            complete_with_stream_id (int, optional):
-
-        Returns:
-
+                If supplied, converts an incomplete row into a complete row,
+                with the supplied stream_id marked as the stream_id where the
+                row was completed.
        """

        while True:
@ -877,6 +991,11 @@ class StatsStore(StateDeltasStore):
        complete_with_stream_id=None,
        absolute_fields=None,
    ):
+        """
+        See L{update_stats_delta}
+        Additional Args:
+            absolute_fields (dict[str, int]): Absolute stats values (i.e. not deltas).
+        """
        table, id_col = TYPE_TO_TABLE[stats_type]

        quantised_ts = self.quantise_stats_time(int(ts))
@ -923,7 +1042,7 @@ class StatsStore(StateDeltasStore):
            insertee = {
                id_col: stats_id,
                "end_ts": end_ts,
-                "start_ts": ts,  # TODO or do we use qts?
+                "start_ts": ts,
                "completed_delta_stream_id": complete_with_stream_id,
            }

@ -968,6 +1087,19 @@ class StatsStore(StateDeltasStore):
            raise OldCollectionRequired()

    def incremental_update_total_events(self, in_positions):
+        """
+        Counts the number of events per-room and then adds these to the respective
+        total_events room counts.
+
+        Args:
+            in_positions (dict): Positions,
+                as retrieved from L{get_stats_positions}.
+
+        Returns (dict):
+            The new positions. Note that this is for reference only –
+            the new positions WILL be committed by this function.
+        """
+
        def incremental_update_total_events_txn(txn):
            positions = in_positions.copy()