Add comments

This commit is contained in:
Erik Johnston 2017-05-31 15:00:29 +01:00
parent 293ef29655
commit 63fda37e20
4 changed files with 177 additions and 49 deletions

View file

@ -26,25 +26,54 @@ logger = logging.getLogger(__name__)
class UserDirectoyHandler(object):
"""Handles querying of and keeping updated the user_directory.
N.B.: ASSUMES IT IS THE ONLY THING THAT MODIFIES THE USER DIRECTORY
"""
def __init__(self, hs):
self.store = hs.get_datastore()
self.state = hs.get_state_handler()
self.server_name = hs.hostname
self.clock = hs.get_clock()
# When start up for the first time we need to populate the user_directory.
# This is a set of user_id's we've inserted already
self.initially_handled_users = set()
# The current position in the current_state_delta stream
self.pos = None
# Guard to ensure we only process deltas one at a time
self._is_processing = False
# We kick this off so that we don't have to wait for a change before
# we start populating the user directory
self.clock.call_later(0, self.notify_new_event)
def search_users(self, search_term, limit):
"""Searches for users in directory
Returns:
dict of the form::
{
"limited": <bool>, # whether there were more results or not
"results": [ # Ordered by best match first
{
"user_id": <user_id>,
"display_name": <display_name>,
"avatar_url": <avatar_url>
}
]
}
"""
return self.store.search_user_dir(search_term, limit)
@defer.inlineCallbacks
def notify_new_event(self):
"""Called when there may be more deltas to process
"""
if self._is_processing:
return
@ -56,13 +85,16 @@ class UserDirectoyHandler(object):
@defer.inlineCallbacks
def _unsafe_process(self):
# If self.pos is None then means we haven't fetched it from DB
if self.pos is None:
self.pos = yield self.store.get_user_directory_stream_pos()
# If still None then we need to do the initial fill of directory
if self.pos is None:
yield self._do_initial_spam()
self.pos = yield self.store.get_user_directory_stream_pos()
# Loop round handling deltas until we're up to date
while True:
with Measure(self.clock, "user_dir_delta"):
deltas = yield self.store.get_current_state_deltas(self.pos)
@ -75,7 +107,31 @@ class UserDirectoyHandler(object):
yield self.store.update_user_directory_stream_pos(self.pos)
@defer.inlineCallbacks
def _handle_room(self, room_id):
def _do_initial_spam(self):
"""Populates the user_directory from the current state of the DB, used
when synapse first starts with user_directory support
"""
# TODO: pull from current delta stream_id
new_pos = self.store.get_room_max_stream_ordering()
# Delete any existing entries just in case there are any
yield self.store.delete_all_from_user_dir()
# We process by going through each existing room at a time.
room_ids = yield self.store.get_all_rooms()
for room_id in room_ids:
yield self._handle_intial_room(room_id)
self.initially_handled_users = None
yield self.store.update_user_directory_stream_pos(new_pos)
@defer.inlineCallbacks
def _handle_intial_room(self, room_id):
"""Called when we initially fill out user_directory one room at a time
"""
# TODO: Check we're still joined to room
is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id)
@ -93,50 +149,10 @@ class UserDirectoyHandler(object):
self.initially_handled_users |= unhandled_users
@defer.inlineCallbacks
def _do_initial_spam(self):
# TODO: pull from current delta stream_id
new_pos = self.store.get_room_max_stream_ordering()
yield self.store.delete_all_from_user_dir()
room_ids = yield self.store.get_all_rooms()
for room_id in room_ids:
yield self._handle_room(room_id)
self.initially_handled_users = None
yield self.store.update_user_directory_stream_pos(new_pos)
@defer.inlineCallbacks
def _handle_new_user(self, room_id, user_id, profile):
row = yield self.store.get_user_in_directory(user_id)
if row:
return
yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile})
def _handle_remove_user(self, room_id, user_id):
row = yield self.store.get_user_in_directory(user_id)
if not row or row["room_id"] != room_id:
return
# TODO: Make this faster?
rooms = yield self.store.get_rooms_for_user(user_id)
for j_room_id in rooms:
is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
j_room_id
)
if is_public:
yield self.store.update_user_in_user_dir(user_id, j_room_id)
return
yield self.store.remove_from_user_dir(user_id)
@defer.inlineCallbacks
def _handle_deltas(self, deltas):
"""Called with the state deltas to process
"""
for delta in deltas:
typ = delta["type"]
state_key = delta["state_key"]
@ -144,22 +160,33 @@ class UserDirectoyHandler(object):
event_id = delta["event_id"]
prev_event_id = delta["prev_event_id"]
# For join rule and visibility changes we need to check if the room
# may have become public or not and add/remove the users in said room
if typ == EventTypes.RoomHistoryVisibility:
change = yield self._get_key_change(
prev_event_id, event_id,
key_name="history_visibility",
public_value="world_readable",
)
# If change is None, no change. True => become world readable,
# False => was world readable
if change is None:
continue
# There's been a change to or from being world readable.
is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
room_id
)
if change and is_public:
if change and not is_public:
# If we became world readable but room isn't currently public then
# we ignore the change
continue
elif not change and not is_public:
elif not change and is_public:
# If we stopped being world readable but are still public,
# ignore the change
continue
users_with_profile = yield self.state.get_current_user_in_room(room_id)
@ -213,8 +240,60 @@ class UserDirectoyHandler(object):
else:
yield self._handle_remove_user(room_id, state_key)
@defer.inlineCallbacks
def _handle_new_user(self, room_id, user_id, profile):
"""Called when we might need to add user to directory
Args:
room_id (str): room_id that user joined or started being public that
user_id (str)
"""
row = yield self.store.get_user_in_directory(user_id)
if row:
return
yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile})
def _handle_remove_user(self, room_id, user_id):
"""Called when we might need to remove user to directory
Args:
room_id (str): room_id that user left or stopped being public that
user_id (str)
"""
row = yield self.store.get_user_in_directory(user_id)
if not row or row["room_id"] != room_id:
# Either the user wasn't in directory or we're still in a room that
# is public (i.e. the room_id in the database)
return
# TODO: Make this faster?
rooms = yield self.store.get_rooms_for_user(user_id)
for j_room_id in rooms:
is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
j_room_id
)
if is_public:
yield self.store.update_user_in_user_dir(user_id, j_room_id)
return
yield self.store.remove_from_user_dir(user_id)
@defer.inlineCallbacks
def _get_key_change(self, prev_event_id, event_id, key_name, public_value):
"""Given two events check if the `key_name` field in content changed
from not matching `public_value` to doing so.
For example, check if `history_visibility` (`key_name`) changed from
`shared` to `world_readable` (`public_value`).
Returns:
None if the field in the events either both match `public_value` o
neither do, i.e. there has been no change.
True if it didnt match `public_value` but now does
Falsse if it did match `public_value` but now doesn't
"""
prev_event = None
event = None
if prev_event_id:

View file

@ -39,6 +39,22 @@ class UserDirectorySearchRestServlet(RestServlet):
@defer.inlineCallbacks
def on_POST(self, request):
"""Searches for users in directory
Returns:
dict of the form::
{
"limited": <bool>, # whether there were more results or not
"results": [ # Ordered by best match first
{
"user_id": <user_id>,
"display_name": <display_name>,
"avatar_url": <avatar_url>
}
]
}
"""
yield self.auth.get_user_by_req(request, allow_guest=False)
body = parse_json_object_from_request(request)

View file

@ -34,7 +34,7 @@ INSERT INTO user_directory_stream_pos (stream_id) VALUES (null);
POSTGRES_TABLE = """
CREATE TABLE user_directory (
user_id TEXT NOT NULL,
room_id TEXT NOT NULL,
room_id TEXT NOT NULL, -- A room_id that we know is public
display_name TEXT,
avatar_url TEXT,
vector tsvector

View file

@ -26,6 +26,8 @@ class UserDirectoryStore(SQLBaseStore):
@cachedInlineCallbacks(cache_context=True)
def is_room_world_readable_or_publicly_joinable(self, room_id, cache_context):
"""Check if the room is either world_readable or publically joinable
"""
current_state_ids = yield self.get_current_state_ids(
room_id, on_invalidate=cache_context.invalidate
)
@ -47,14 +49,24 @@ class UserDirectoryStore(SQLBaseStore):
defer.returnValue(False)
def add_profiles_to_user_dir(self, room_id, users_with_profile):
"""Add profiles to the user directory
Args:
room_id (str): A room_id that all users are in that is world_readable
or publically joinable
users_with_profile (dict): Users to add to directory in the form of
mapping of user_id -> ProfileInfo
"""
if isinstance(self.database_engine, PostgresEngine):
# We weight the loclpart most highly, then display name and finally
# server name
sql = """
INSERT INTO user_directory
(user_id, room_id, display_name, avatar_url, vector)
VALUES (?,?,?,?,
setweight(to_tsvector('english', ?), 'A')
|| to_tsvector('english', ?)
|| to_tsvector('english', COALESCE(?, ''))
|| setweight(to_tsvector('english', ?), 'D')
|| setweight(to_tsvector('english', COALESCE(?, '')), 'B')
)
"""
args = (
@ -113,6 +125,8 @@ class UserDirectoryStore(SQLBaseStore):
self.get_user_in_directory.invalidate((user_id,))
def get_all_rooms(self):
"""Get all room_ids we've ever known about
"""
return self._simple_select_onecol(
table="current_state_events",
keyvalues={},
@ -121,6 +135,8 @@ class UserDirectoryStore(SQLBaseStore):
)
def delete_all_from_user_dir(self):
"""Delete the entire user directory
"""
def _delete_all_from_user_dir_txn(txn):
txn.execute("DELETE FROM user_directory")
txn.call_after(self.get_user_in_directory.invalidate_all)
@ -170,12 +186,29 @@ class UserDirectoryStore(SQLBaseStore):
@defer.inlineCallbacks
def search_user_dir(self, search_term, limit):
"""Searches for users in directory
Returns:
dict of the form::
{
"limited": <bool>, # whether there were more results or not
"results": [ # Ordered by best match first
{
"user_id": <user_id>,
"display_name": <display_name>,
"avatar_url": <avatar_url>
}
]
}
"""
if isinstance(self.database_engine, PostgresEngine):
sql = """
SELECT user_id, display_name, avatar_url
FROM user_directory
WHERE vector @@ plainto_tsquery('english', ?)
ORDER BY ts_rank_cd(vector, plainto_tsquery('english', ?)) DESC
ORDER BY ts_rank_cd(vector, plainto_tsquery('english', ?)) DESC
LIMIT ?
"""
args = (search_term, search_term, limit + 1,)