mirror of
https://github.com/element-hq/synapse.git
synced 2024-11-22 01:25:44 +03:00
Handle hyphens in user dir search porperly (#17254)
Some checks are pending
Build docker images / build (push) Waiting to run
Deploy the documentation / Calculate variables for GitHub Pages deployment (push) Waiting to run
Deploy the documentation / GitHub Pages (push) Blocked by required conditions
Build release artifacts / Calculate list of debian distros (push) Waiting to run
Build release artifacts / Build .deb packages (push) Blocked by required conditions
Build release artifacts / Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} (aarch64, ${{ startsWith(github.ref, 'refs/pull/') }}, ubuntu-20.04) (push) Waiting to run
Build release artifacts / Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} (x86_64, ${{ startsWith(github.ref, 'refs/pull/') }}, macos-11) (push) Waiting to run
Build release artifacts / Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} (x86_64, ${{ startsWith(github.ref, 'refs/pull/') }}, ubuntu-20.04) (push) Waiting to run
Build release artifacts / Build sdist (push) Waiting to run
Build release artifacts / Attach assets to release (push) Blocked by required conditions
Tests / changes (push) Waiting to run
Tests / check-sampleconfig (push) Blocked by required conditions
Tests / check-schema-delta (push) Blocked by required conditions
Tests / check-lockfile (push) Waiting to run
Tests / lint (push) Blocked by required conditions
Tests / calculate-test-jobs (push) Blocked by required conditions
Tests / Typechecking (push) Blocked by required conditions
Tests / lint-crlf (push) Waiting to run
Tests / lint-newsfile (push) Waiting to run
Tests / lint-pydantic (push) Blocked by required conditions
Tests / lint-clippy (push) Blocked by required conditions
Tests / lint-clippy-nightly (push) Blocked by required conditions
Tests / lint-rustfmt (push) Blocked by required conditions
Tests / linting-done (push) Blocked by required conditions
Tests / trial (push) Blocked by required conditions
Tests / trial-olddeps (push) Blocked by required conditions
Tests / trial-pypy (all, pypy-3.8) (push) Blocked by required conditions
Tests / sytest (push) Blocked by required conditions
Tests / export-data (push) Blocked by required conditions
Tests / portdb (11, 3.8) (push) Blocked by required conditions
Tests / portdb (15, 3.11) (push) Blocked by required conditions
Tests / complement (monolith, Postgres) (push) Blocked by required conditions
Tests / complement (monolith, SQLite) (push) Blocked by required conditions
Tests / complement (workers, Postgres) (push) Blocked by required conditions
Tests / cargo-test (push) Blocked by required conditions
Tests / cargo-bench (push) Blocked by required conditions
Tests / tests-done (push) Blocked by required conditions
Some checks are pending
Build docker images / build (push) Waiting to run
Deploy the documentation / Calculate variables for GitHub Pages deployment (push) Waiting to run
Deploy the documentation / GitHub Pages (push) Blocked by required conditions
Build release artifacts / Calculate list of debian distros (push) Waiting to run
Build release artifacts / Build .deb packages (push) Blocked by required conditions
Build release artifacts / Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} (aarch64, ${{ startsWith(github.ref, 'refs/pull/') }}, ubuntu-20.04) (push) Waiting to run
Build release artifacts / Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} (x86_64, ${{ startsWith(github.ref, 'refs/pull/') }}, macos-11) (push) Waiting to run
Build release artifacts / Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} (x86_64, ${{ startsWith(github.ref, 'refs/pull/') }}, ubuntu-20.04) (push) Waiting to run
Build release artifacts / Build sdist (push) Waiting to run
Build release artifacts / Attach assets to release (push) Blocked by required conditions
Tests / changes (push) Waiting to run
Tests / check-sampleconfig (push) Blocked by required conditions
Tests / check-schema-delta (push) Blocked by required conditions
Tests / check-lockfile (push) Waiting to run
Tests / lint (push) Blocked by required conditions
Tests / calculate-test-jobs (push) Blocked by required conditions
Tests / Typechecking (push) Blocked by required conditions
Tests / lint-crlf (push) Waiting to run
Tests / lint-newsfile (push) Waiting to run
Tests / lint-pydantic (push) Blocked by required conditions
Tests / lint-clippy (push) Blocked by required conditions
Tests / lint-clippy-nightly (push) Blocked by required conditions
Tests / lint-rustfmt (push) Blocked by required conditions
Tests / linting-done (push) Blocked by required conditions
Tests / trial (push) Blocked by required conditions
Tests / trial-olddeps (push) Blocked by required conditions
Tests / trial-pypy (all, pypy-3.8) (push) Blocked by required conditions
Tests / sytest (push) Blocked by required conditions
Tests / export-data (push) Blocked by required conditions
Tests / portdb (11, 3.8) (push) Blocked by required conditions
Tests / portdb (15, 3.11) (push) Blocked by required conditions
Tests / complement (monolith, Postgres) (push) Blocked by required conditions
Tests / complement (monolith, SQLite) (push) Blocked by required conditions
Tests / complement (workers, Postgres) (push) Blocked by required conditions
Tests / cargo-test (push) Blocked by required conditions
Tests / cargo-bench (push) Blocked by required conditions
Tests / tests-done (push) Blocked by required conditions
c.f. #16675
This commit is contained in:
parent
7d8f0ef351
commit
aabf577166
4 changed files with 104 additions and 6 deletions
1
changelog.d/17254.bugfix
Normal file
1
changelog.d/17254.bugfix
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Fix searching for users with their exact localpart whose ID includes a hyphen.
|
|
@ -1281,7 +1281,7 @@ def _parse_words_with_regex(search_term: str) -> List[str]:
|
||||||
Break down search term into words, when we don't have ICU available.
|
Break down search term into words, when we don't have ICU available.
|
||||||
See: `_parse_words`
|
See: `_parse_words`
|
||||||
"""
|
"""
|
||||||
return re.findall(r"([\w\-]+)", search_term, re.UNICODE)
|
return re.findall(r"([\w-]+)", search_term, re.UNICODE)
|
||||||
|
|
||||||
|
|
||||||
def _parse_words_with_icu(search_term: str) -> List[str]:
|
def _parse_words_with_icu(search_term: str) -> List[str]:
|
||||||
|
@ -1303,15 +1303,69 @@ def _parse_words_with_icu(search_term: str) -> List[str]:
|
||||||
if j < 0:
|
if j < 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
result = search_term[i:j]
|
# We want to make sure that we split on `@` and `:` specifically, as
|
||||||
|
# they occur in user IDs.
|
||||||
|
for result in re.split(r"[@:]+", search_term[i:j]):
|
||||||
|
results.append(result.strip())
|
||||||
|
|
||||||
|
i = j
|
||||||
|
|
||||||
|
# libicu will break up words that have punctuation in them, but to handle
|
||||||
|
# cases where user IDs have '-', '.' and '_' in them we want to *not* break
|
||||||
|
# those into words and instead allow the DB to tokenise them how it wants.
|
||||||
|
#
|
||||||
|
# In particular, user-71 in postgres gets tokenised to "user, -71", and this
|
||||||
|
# will not match a query for "user, 71".
|
||||||
|
new_results: List[str] = []
|
||||||
|
i = 0
|
||||||
|
while i < len(results):
|
||||||
|
curr = results[i]
|
||||||
|
|
||||||
|
prev = None
|
||||||
|
next = None
|
||||||
|
if i > 0:
|
||||||
|
prev = results[i - 1]
|
||||||
|
if i + 1 < len(results):
|
||||||
|
next = results[i + 1]
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
# libicu considers spaces and punctuation between words as words, but we don't
|
# libicu considers spaces and punctuation between words as words, but we don't
|
||||||
# want to include those in results as they would result in syntax errors in SQL
|
# want to include those in results as they would result in syntax errors in SQL
|
||||||
# queries (e.g. "foo bar" would result in the search query including "foo & &
|
# queries (e.g. "foo bar" would result in the search query including "foo & &
|
||||||
# bar").
|
# bar").
|
||||||
if len(re.findall(r"([\w\-]+)", result, re.UNICODE)):
|
if not curr:
|
||||||
results.append(result)
|
continue
|
||||||
|
|
||||||
i = j
|
if curr in ["-", ".", "_"]:
|
||||||
|
prefix = ""
|
||||||
|
suffix = ""
|
||||||
|
|
||||||
return results
|
# Check if the next item is a word, and if so use it as the suffix.
|
||||||
|
# We check for if its a word as we don't want to concatenate
|
||||||
|
# multiple punctuation marks.
|
||||||
|
if next is not None and re.match(r"\w", next):
|
||||||
|
suffix = next
|
||||||
|
i += 1 # We're using next, so we skip it in the outer loop.
|
||||||
|
else:
|
||||||
|
# We want to avoid creating terms like "user-", as we should
|
||||||
|
# strip trailing punctuation.
|
||||||
|
continue
|
||||||
|
|
||||||
|
if prev and re.match(r"\w", prev) and new_results:
|
||||||
|
prefix = new_results[-1]
|
||||||
|
new_results.pop()
|
||||||
|
|
||||||
|
# We might not have a prefix here, but that's fine as we want to
|
||||||
|
# ensure that we don't strip preceding punctuation e.g. '-71'
|
||||||
|
# shouldn't be converted to '71'.
|
||||||
|
|
||||||
|
new_results.append(f"{prefix}{curr}{suffix}")
|
||||||
|
continue
|
||||||
|
elif not re.match(r"\w", curr):
|
||||||
|
# Ignore other punctuation
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_results.append(curr)
|
||||||
|
|
||||||
|
return new_results
|
||||||
|
|
|
@ -1061,6 +1061,45 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
|
||||||
{alice: ProfileInfo(display_name=None, avatar_url=MXC_DUMMY)},
|
{alice: ProfileInfo(display_name=None, avatar_url=MXC_DUMMY)},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_search_punctuation(self) -> None:
|
||||||
|
"""Test that you can search for a user that includes punctuation"""
|
||||||
|
|
||||||
|
searching_user = self.register_user("searcher", "password")
|
||||||
|
searching_user_tok = self.login("searcher", "password")
|
||||||
|
|
||||||
|
room_id = self.helper.create_room_as(
|
||||||
|
searching_user,
|
||||||
|
room_version=RoomVersions.V1.identifier,
|
||||||
|
tok=searching_user_tok,
|
||||||
|
)
|
||||||
|
|
||||||
|
# We want to test searching for users of the form e.g. "user-1", with
|
||||||
|
# various punctuation. We also test both where the prefix is numeric and
|
||||||
|
# alphanumeric, as e.g. postgres tokenises "user-1" as "user" and "-1".
|
||||||
|
i = 1
|
||||||
|
for char in ["-", ".", "_"]:
|
||||||
|
for use_numeric in [False, True]:
|
||||||
|
if use_numeric:
|
||||||
|
prefix1 = f"{i}"
|
||||||
|
prefix2 = f"{i+1}"
|
||||||
|
else:
|
||||||
|
prefix1 = f"a{i}"
|
||||||
|
prefix2 = f"a{i+1}"
|
||||||
|
|
||||||
|
local_user_1 = self.register_user(f"user{char}{prefix1}", "password")
|
||||||
|
local_user_2 = self.register_user(f"user{char}{prefix2}", "password")
|
||||||
|
|
||||||
|
self._add_user_to_room(room_id, RoomVersions.V1, local_user_1)
|
||||||
|
self._add_user_to_room(room_id, RoomVersions.V1, local_user_2)
|
||||||
|
|
||||||
|
results = self.get_success(
|
||||||
|
self.handler.search_users(searching_user, local_user_1, 20)
|
||||||
|
)["results"]
|
||||||
|
received_user_id_ordering = [result["user_id"] for result in results]
|
||||||
|
self.assertSequenceEqual(received_user_id_ordering[:1], [local_user_1])
|
||||||
|
|
||||||
|
i += 2
|
||||||
|
|
||||||
|
|
||||||
class TestUserDirSearchDisabled(unittest.HomeserverTestCase):
|
class TestUserDirSearchDisabled(unittest.HomeserverTestCase):
|
||||||
servlets = [
|
servlets = [
|
||||||
|
|
|
@ -711,6 +711,10 @@ class UserDirectoryICUTestCase(HomeserverTestCase):
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.assertEqual(_parse_words_with_icu("user-1"), ["user-1"])
|
||||||
|
self.assertEqual(_parse_words_with_icu("user-ab"), ["user-ab"])
|
||||||
|
self.assertEqual(_parse_words_with_icu("user.--1"), ["user", "-1"])
|
||||||
|
|
||||||
def test_regex_word_boundary_punctuation(self) -> None:
|
def test_regex_word_boundary_punctuation(self) -> None:
|
||||||
"""
|
"""
|
||||||
Tests the behaviour of punctuation with the non-ICU tokeniser
|
Tests the behaviour of punctuation with the non-ICU tokeniser
|
||||||
|
|
Loading…
Reference in a new issue