mirror of
https://github.com/element-hq/synapse.git
synced 2024-11-29 15:39:00 +03:00
Merge pull request #911 from matrix-org/erikj/purge_history
Feature: Purge local room history.
This commit is contained in:
commit
70d650be2b
7 changed files with 389 additions and 37 deletions
|
@ -1413,7 +1413,7 @@ class FederationHandler(BaseHandler):
|
||||||
local_view = dict(auth_events)
|
local_view = dict(auth_events)
|
||||||
remote_view = dict(auth_events)
|
remote_view = dict(auth_events)
|
||||||
remote_view.update({
|
remote_view.update({
|
||||||
(d.type, d.state_key): d for d in different_events
|
(d.type, d.state_key): d for d in different_events if d
|
||||||
})
|
})
|
||||||
|
|
||||||
new_state, prev_state = self.state_handler.resolve_events(
|
new_state, prev_state = self.state_handler.resolve_events(
|
||||||
|
|
|
@ -26,7 +26,7 @@ from synapse.types import (
|
||||||
UserID, RoomAlias, RoomStreamToken, StreamToken, get_domain_from_id
|
UserID, RoomAlias, RoomStreamToken, StreamToken, get_domain_from_id
|
||||||
)
|
)
|
||||||
from synapse.util import unwrapFirstError
|
from synapse.util import unwrapFirstError
|
||||||
from synapse.util.async import concurrently_execute, run_on_reactor
|
from synapse.util.async import concurrently_execute, run_on_reactor, ReadWriteLock
|
||||||
from synapse.util.caches.snapshot_cache import SnapshotCache
|
from synapse.util.caches.snapshot_cache import SnapshotCache
|
||||||
from synapse.util.logcontext import preserve_fn
|
from synapse.util.logcontext import preserve_fn
|
||||||
from synapse.visibility import filter_events_for_client
|
from synapse.visibility import filter_events_for_client
|
||||||
|
@ -50,6 +50,20 @@ class MessageHandler(BaseHandler):
|
||||||
self.validator = EventValidator()
|
self.validator = EventValidator()
|
||||||
self.snapshot_cache = SnapshotCache()
|
self.snapshot_cache = SnapshotCache()
|
||||||
|
|
||||||
|
self.pagination_lock = ReadWriteLock()
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def purge_history(self, room_id, event_id):
|
||||||
|
event = yield self.store.get_event(event_id)
|
||||||
|
|
||||||
|
if event.room_id != room_id:
|
||||||
|
raise SynapseError(400, "Event is for wrong room.")
|
||||||
|
|
||||||
|
depth = event.depth
|
||||||
|
|
||||||
|
with (yield self.pagination_lock.write(room_id)):
|
||||||
|
yield self.store.delete_old_state(room_id, depth)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def get_messages(self, requester, room_id=None, pagin_config=None,
|
def get_messages(self, requester, room_id=None, pagin_config=None,
|
||||||
as_client_event=True):
|
as_client_event=True):
|
||||||
|
@ -85,6 +99,7 @@ class MessageHandler(BaseHandler):
|
||||||
|
|
||||||
source_config = pagin_config.get_source_config("room")
|
source_config = pagin_config.get_source_config("room")
|
||||||
|
|
||||||
|
with (yield self.pagination_lock.read(room_id)):
|
||||||
membership, member_event_id = yield self._check_in_room_or_world_readable(
|
membership, member_event_id = yield self._check_in_room_or_world_readable(
|
||||||
room_id, user_id
|
room_id, user_id
|
||||||
)
|
)
|
||||||
|
@ -95,7 +110,7 @@ class MessageHandler(BaseHandler):
|
||||||
if room_token.topological:
|
if room_token.topological:
|
||||||
max_topo = room_token.topological
|
max_topo = room_token.topological
|
||||||
else:
|
else:
|
||||||
max_topo = yield self.store.get_max_topological_token_for_stream_and_room(
|
max_topo = yield self.store.get_max_topological_token(
|
||||||
room_id, room_token.stream
|
room_id, room_token.stream
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -77,6 +77,24 @@ class PurgeMediaCacheRestServlet(ClientV1RestServlet):
|
||||||
defer.returnValue((200, ret))
|
defer.returnValue((200, ret))
|
||||||
|
|
||||||
|
|
||||||
|
class PurgeHistoryRestServlet(ClientV1RestServlet):
|
||||||
|
PATTERNS = client_path_patterns(
|
||||||
|
"/admin/purge_history/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
|
||||||
|
)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def on_POST(self, request, room_id, event_id):
|
||||||
|
requester = yield self.auth.get_user_by_req(request)
|
||||||
|
is_admin = yield self.auth.is_server_admin(requester.user)
|
||||||
|
|
||||||
|
if not is_admin:
|
||||||
|
raise AuthError(403, "You are not a server admin")
|
||||||
|
|
||||||
|
yield self.handlers.message_handler.purge_history(room_id, event_id)
|
||||||
|
|
||||||
|
defer.returnValue((200, {}))
|
||||||
|
|
||||||
|
|
||||||
class DeactivateAccountRestServlet(ClientV1RestServlet):
|
class DeactivateAccountRestServlet(ClientV1RestServlet):
|
||||||
PATTERNS = client_path_patterns("/admin/deactivate/(?P<target_user_id>[^/]*)")
|
PATTERNS = client_path_patterns("/admin/deactivate/(?P<target_user_id>[^/]*)")
|
||||||
|
|
||||||
|
@ -106,3 +124,4 @@ def register_servlets(hs, http_server):
|
||||||
WhoisRestServlet(hs).register(http_server)
|
WhoisRestServlet(hs).register(http_server)
|
||||||
PurgeMediaCacheRestServlet(hs).register(http_server)
|
PurgeMediaCacheRestServlet(hs).register(http_server)
|
||||||
DeactivateAccountRestServlet(hs).register(http_server)
|
DeactivateAccountRestServlet(hs).register(http_server)
|
||||||
|
PurgeHistoryRestServlet(hs).register(http_server)
|
||||||
|
|
|
@ -23,6 +23,7 @@ from synapse.util.async import ObservableDeferred
|
||||||
from synapse.util.logcontext import preserve_fn, PreserveLoggingContext
|
from synapse.util.logcontext import preserve_fn, PreserveLoggingContext
|
||||||
from synapse.util.logutils import log_function
|
from synapse.util.logutils import log_function
|
||||||
from synapse.api.constants import EventTypes
|
from synapse.api.constants import EventTypes
|
||||||
|
from synapse.api.errors import SynapseError
|
||||||
|
|
||||||
from canonicaljson import encode_canonical_json
|
from canonicaljson import encode_canonical_json
|
||||||
from collections import deque, namedtuple
|
from collections import deque, namedtuple
|
||||||
|
@ -1281,6 +1282,156 @@ class EventsStore(SQLBaseStore):
|
||||||
)
|
)
|
||||||
return self.runInteraction("get_all_new_events", get_all_new_events_txn)
|
return self.runInteraction("get_all_new_events", get_all_new_events_txn)
|
||||||
|
|
||||||
|
def delete_old_state(self, room_id, topological_ordering):
|
||||||
|
return self.runInteraction(
|
||||||
|
"delete_old_state",
|
||||||
|
self._delete_old_state_txn, room_id, topological_ordering
|
||||||
|
)
|
||||||
|
|
||||||
|
def _delete_old_state_txn(self, txn, room_id, topological_ordering):
|
||||||
|
"""Deletes old room state
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Tables that should be pruned:
|
||||||
|
# event_auth
|
||||||
|
# event_backward_extremities
|
||||||
|
# event_content_hashes
|
||||||
|
# event_destinations
|
||||||
|
# event_edge_hashes
|
||||||
|
# event_edges
|
||||||
|
# event_forward_extremities
|
||||||
|
# event_json
|
||||||
|
# event_push_actions
|
||||||
|
# event_reference_hashes
|
||||||
|
# event_search
|
||||||
|
# event_signatures
|
||||||
|
# event_to_state_groups
|
||||||
|
# events
|
||||||
|
# rejections
|
||||||
|
# room_depth
|
||||||
|
# state_groups
|
||||||
|
# state_groups_state
|
||||||
|
|
||||||
|
# First ensure that we're not about to delete all the forward extremeties
|
||||||
|
txn.execute(
|
||||||
|
"SELECT e.event_id, e.depth FROM events as e "
|
||||||
|
"INNER JOIN event_forward_extremities as f "
|
||||||
|
"ON e.event_id = f.event_id "
|
||||||
|
"AND e.room_id = f.room_id "
|
||||||
|
"WHERE f.room_id = ?",
|
||||||
|
(room_id,)
|
||||||
|
)
|
||||||
|
rows = txn.fetchall()
|
||||||
|
max_depth = max(row[0] for row in rows)
|
||||||
|
|
||||||
|
if max_depth <= topological_ordering:
|
||||||
|
# We need to ensure we don't delete all the events from the datanase
|
||||||
|
# otherwise we wouldn't be able to send any events (due to not
|
||||||
|
# having any backwards extremeties)
|
||||||
|
raise SynapseError(
|
||||||
|
400, "topological_ordering is greater than forward extremeties"
|
||||||
|
)
|
||||||
|
|
||||||
|
txn.execute(
|
||||||
|
"SELECT event_id, state_key FROM events"
|
||||||
|
" LEFT JOIN state_events USING (room_id, event_id)"
|
||||||
|
" WHERE room_id = ? AND topological_ordering < ?",
|
||||||
|
(room_id, topological_ordering,)
|
||||||
|
)
|
||||||
|
event_rows = txn.fetchall()
|
||||||
|
|
||||||
|
# We calculate the new entries for the backward extremeties by finding
|
||||||
|
# all events that point to events that are to be purged
|
||||||
|
txn.execute(
|
||||||
|
"SELECT e.event_id FROM events as e"
|
||||||
|
" INNER JOIN event_edges as ed ON e.event_id = ed.prev_event_id"
|
||||||
|
" INNER JOIN events as e2 ON e2.event_id = ed.event_id"
|
||||||
|
" WHERE e.room_id = ? AND e.topological_ordering < ?"
|
||||||
|
" AND e2.topological_ordering >= ?",
|
||||||
|
(room_id, topological_ordering, topological_ordering)
|
||||||
|
)
|
||||||
|
new_backwards_extrems = txn.fetchall()
|
||||||
|
|
||||||
|
# Get all state groups that are only referenced by events that are
|
||||||
|
# to be deleted.
|
||||||
|
txn.execute(
|
||||||
|
"SELECT state_group FROM event_to_state_groups"
|
||||||
|
" INNER JOIN events USING (event_id)"
|
||||||
|
" WHERE state_group IN ("
|
||||||
|
" SELECT DISTINCT state_group FROM events"
|
||||||
|
" INNER JOIN event_to_state_groups USING (event_id)"
|
||||||
|
" WHERE room_id = ? AND topological_ordering < ?"
|
||||||
|
" )"
|
||||||
|
" GROUP BY state_group HAVING MAX(topological_ordering) < ?",
|
||||||
|
(room_id, topological_ordering, topological_ordering)
|
||||||
|
)
|
||||||
|
state_rows = txn.fetchall()
|
||||||
|
txn.executemany(
|
||||||
|
"DELETE FROM state_groups_state WHERE state_group = ?",
|
||||||
|
state_rows
|
||||||
|
)
|
||||||
|
txn.executemany(
|
||||||
|
"DELETE FROM state_groups WHERE id = ?",
|
||||||
|
state_rows
|
||||||
|
)
|
||||||
|
# Delete all non-state
|
||||||
|
txn.executemany(
|
||||||
|
"DELETE FROM event_to_state_groups WHERE event_id = ?",
|
||||||
|
[(event_id,) for event_id, _ in event_rows]
|
||||||
|
)
|
||||||
|
|
||||||
|
txn.execute(
|
||||||
|
"UPDATE room_depth SET min_depth = ? WHERE room_id = ?",
|
||||||
|
(topological_ordering, room_id,)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Delete all remote non-state events
|
||||||
|
to_delete = [
|
||||||
|
(event_id,) for event_id, state_key in event_rows
|
||||||
|
if state_key is None and not self.hs.is_mine_id(event_id)
|
||||||
|
]
|
||||||
|
for table in (
|
||||||
|
"events",
|
||||||
|
"event_json",
|
||||||
|
"event_auth",
|
||||||
|
"event_content_hashes",
|
||||||
|
"event_destinations",
|
||||||
|
"event_edge_hashes",
|
||||||
|
"event_edges",
|
||||||
|
"event_forward_extremities",
|
||||||
|
"event_push_actions",
|
||||||
|
"event_reference_hashes",
|
||||||
|
"event_search",
|
||||||
|
"event_signatures",
|
||||||
|
"rejections",
|
||||||
|
"event_backward_extremities",
|
||||||
|
):
|
||||||
|
txn.executemany(
|
||||||
|
"DELETE FROM %s WHERE event_id = ?" % (table,),
|
||||||
|
to_delete
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update backward extremeties
|
||||||
|
txn.executemany(
|
||||||
|
"INSERT INTO event_backward_extremities (room_id, event_id)"
|
||||||
|
" VALUES (?, ?)",
|
||||||
|
[(room_id, event_id) for event_id, in new_backwards_extrems]
|
||||||
|
)
|
||||||
|
|
||||||
|
txn.executemany(
|
||||||
|
"DELETE FROM events WHERE event_id = ?",
|
||||||
|
to_delete
|
||||||
|
)
|
||||||
|
# Mark all state and own events as outliers
|
||||||
|
txn.executemany(
|
||||||
|
"UPDATE events SET outlier = ?"
|
||||||
|
" WHERE event_id = ?",
|
||||||
|
[
|
||||||
|
(True, event_id,) for event_id, state_key in event_rows
|
||||||
|
if state_key is not None or self.hs.is_mine_id(event_id)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
AllNewEventsResult = namedtuple("AllNewEventsResult", [
|
AllNewEventsResult = namedtuple("AllNewEventsResult", [
|
||||||
"new_forward_events", "new_backfill_events",
|
"new_forward_events", "new_backfill_events",
|
||||||
|
|
|
@ -510,13 +510,13 @@ class StreamStore(SQLBaseStore):
|
||||||
row["topological_ordering"], row["stream_ordering"],)
|
row["topological_ordering"], row["stream_ordering"],)
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_max_topological_token_for_stream_and_room(self, room_id, stream_key):
|
def get_max_topological_token(self, room_id, stream_key):
|
||||||
sql = (
|
sql = (
|
||||||
"SELECT max(topological_ordering) FROM events"
|
"SELECT max(topological_ordering) FROM events"
|
||||||
" WHERE room_id = ? AND stream_ordering < ?"
|
" WHERE room_id = ? AND stream_ordering < ?"
|
||||||
)
|
)
|
||||||
return self._execute(
|
return self._execute(
|
||||||
"get_max_topological_token_for_stream_and_room", None,
|
"get_max_topological_token", None,
|
||||||
sql, room_id, stream_key,
|
sql, room_id, stream_key,
|
||||||
).addCallback(
|
).addCallback(
|
||||||
lambda r: r[0][0] if r else 0
|
lambda r: r[0][0] if r else 0
|
||||||
|
|
|
@ -194,3 +194,85 @@ class Linearizer(object):
|
||||||
self.key_to_defer.pop(key, None)
|
self.key_to_defer.pop(key, None)
|
||||||
|
|
||||||
defer.returnValue(_ctx_manager())
|
defer.returnValue(_ctx_manager())
|
||||||
|
|
||||||
|
|
||||||
|
class ReadWriteLock(object):
|
||||||
|
"""A deferred style read write lock.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
with (yield read_write_lock.read("test_key")):
|
||||||
|
# do some work
|
||||||
|
"""
|
||||||
|
|
||||||
|
# IMPLEMENTATION NOTES
|
||||||
|
#
|
||||||
|
# We track the most recent queued reader and writer deferreds (which get
|
||||||
|
# resolved when they release the lock).
|
||||||
|
#
|
||||||
|
# Read: We know its safe to acquire a read lock when the latest writer has
|
||||||
|
# been resolved. The new reader is appeneded to the list of latest readers.
|
||||||
|
#
|
||||||
|
# Write: We know its safe to acquire the write lock when both the latest
|
||||||
|
# writers and readers have been resolved. The new writer replaces the latest
|
||||||
|
# writer.
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# Latest readers queued
|
||||||
|
self.key_to_current_readers = {}
|
||||||
|
|
||||||
|
# Latest writer queued
|
||||||
|
self.key_to_current_writer = {}
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def read(self, key):
|
||||||
|
new_defer = defer.Deferred()
|
||||||
|
|
||||||
|
curr_readers = self.key_to_current_readers.setdefault(key, set())
|
||||||
|
curr_writer = self.key_to_current_writer.get(key, None)
|
||||||
|
|
||||||
|
curr_readers.add(new_defer)
|
||||||
|
|
||||||
|
# We wait for the latest writer to finish writing. We can safely ignore
|
||||||
|
# any existing readers... as they're readers.
|
||||||
|
yield curr_writer
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _ctx_manager():
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
new_defer.callback(None)
|
||||||
|
self.key_to_current_readers.get(key, set()).discard(new_defer)
|
||||||
|
|
||||||
|
defer.returnValue(_ctx_manager())
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def write(self, key):
|
||||||
|
new_defer = defer.Deferred()
|
||||||
|
|
||||||
|
curr_readers = self.key_to_current_readers.get(key, set())
|
||||||
|
curr_writer = self.key_to_current_writer.get(key, None)
|
||||||
|
|
||||||
|
# We wait on all latest readers and writer.
|
||||||
|
to_wait_on = list(curr_readers)
|
||||||
|
if curr_writer:
|
||||||
|
to_wait_on.append(curr_writer)
|
||||||
|
|
||||||
|
# We can clear the list of current readers since the new writer waits
|
||||||
|
# for them to finish.
|
||||||
|
curr_readers.clear()
|
||||||
|
self.key_to_current_writer[key] = new_defer
|
||||||
|
|
||||||
|
yield defer.gatherResults(to_wait_on)
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _ctx_manager():
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
new_defer.callback(None)
|
||||||
|
if self.key_to_current_writer[key] == new_defer:
|
||||||
|
self.key_to_current_writer.pop(key)
|
||||||
|
|
||||||
|
defer.returnValue(_ctx_manager())
|
||||||
|
|
85
tests/util/test_rwlock.py
Normal file
85
tests/util/test_rwlock.py
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Copyright 2016 OpenMarket Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
from tests import unittest
|
||||||
|
|
||||||
|
from synapse.util.async import ReadWriteLock
|
||||||
|
|
||||||
|
|
||||||
|
class ReadWriteLockTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
def _assert_called_before_not_after(self, lst, first_false):
|
||||||
|
for i, d in enumerate(lst[:first_false]):
|
||||||
|
self.assertTrue(d.called, msg="%d was unexpectedly false" % i)
|
||||||
|
|
||||||
|
for i, d in enumerate(lst[first_false:]):
|
||||||
|
self.assertFalse(
|
||||||
|
d.called, msg="%d was unexpectedly true" % (i + first_false)
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_rwlock(self):
|
||||||
|
rwlock = ReadWriteLock()
|
||||||
|
|
||||||
|
key = object()
|
||||||
|
|
||||||
|
ds = [
|
||||||
|
rwlock.read(key), # 0
|
||||||
|
rwlock.read(key), # 1
|
||||||
|
rwlock.write(key), # 2
|
||||||
|
rwlock.write(key), # 3
|
||||||
|
rwlock.read(key), # 4
|
||||||
|
rwlock.read(key), # 5
|
||||||
|
rwlock.write(key), # 6
|
||||||
|
]
|
||||||
|
|
||||||
|
self._assert_called_before_not_after(ds, 2)
|
||||||
|
|
||||||
|
with ds[0].result:
|
||||||
|
self._assert_called_before_not_after(ds, 2)
|
||||||
|
self._assert_called_before_not_after(ds, 2)
|
||||||
|
|
||||||
|
with ds[1].result:
|
||||||
|
self._assert_called_before_not_after(ds, 2)
|
||||||
|
self._assert_called_before_not_after(ds, 3)
|
||||||
|
|
||||||
|
with ds[2].result:
|
||||||
|
self._assert_called_before_not_after(ds, 3)
|
||||||
|
self._assert_called_before_not_after(ds, 4)
|
||||||
|
|
||||||
|
with ds[3].result:
|
||||||
|
self._assert_called_before_not_after(ds, 4)
|
||||||
|
self._assert_called_before_not_after(ds, 6)
|
||||||
|
|
||||||
|
with ds[5].result:
|
||||||
|
self._assert_called_before_not_after(ds, 6)
|
||||||
|
self._assert_called_before_not_after(ds, 6)
|
||||||
|
|
||||||
|
with ds[4].result:
|
||||||
|
self._assert_called_before_not_after(ds, 6)
|
||||||
|
self._assert_called_before_not_after(ds, 7)
|
||||||
|
|
||||||
|
with ds[6].result:
|
||||||
|
pass
|
||||||
|
|
||||||
|
d = rwlock.write(key)
|
||||||
|
self.assertTrue(d.called)
|
||||||
|
with d.result:
|
||||||
|
pass
|
||||||
|
|
||||||
|
d = rwlock.read(key)
|
||||||
|
self.assertTrue(d.called)
|
||||||
|
with d.result:
|
||||||
|
pass
|
Loading…
Reference in a new issue