[#3213] Fixed hashtags.name lookup (must use citext type to do index scan). Fixed embedded hashtags lookup (lowercasing), adjusted tests.

This commit is contained in:
Ivan Tashkinov 2021-02-15 21:13:14 +03:00
parent 349b8b0f4f
commit 1dac7d1462
3 changed files with 31 additions and 17 deletions

View file

@ -22,7 +22,9 @@ defmodule Pleroma.Hashtag do
end
def get_by_name(name) do
Repo.get_by(Hashtag, name: name)
from(h in Hashtag)
|> where([h], fragment("name = ?::citext", ^String.downcase(name)))
|> Repo.one()
end
def get_or_create_by_name(name) when is_bitstring(name) do
@ -37,6 +39,7 @@ def get_or_create_by_name(name) when is_bitstring(name) do
end
def get_or_create_by_names(names) when is_list(names) do
names = Enum.map(names, &String.downcase/1)
timestamp = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
structs =
@ -52,7 +55,8 @@ def get_or_create_by_names(names) when is_list(names) do
Multi.new()
|> Multi.insert_all(:insert_all_op, Hashtag, structs, on_conflict: :nothing)
|> Multi.run(:query_op, fn _repo, _changes ->
{:ok, Repo.all(from(ht in Hashtag, where: ht.name in ^names))}
{:ok,
Repo.all(from(ht in Hashtag, where: ht.name in fragment("?::citext[]", ^names)))}
end)
|> Repo.transaction() do
{:ok, hashtags}

View file

@ -698,6 +698,8 @@ defp restrict_embedded_tag_all(_query, %{tag_all: _tag_all, skip_preload: true})
end
defp restrict_embedded_tag_all(query, %{tag_all: [_ | _] = tag_all}) do
tag_all = Enum.map(tag_all, &String.downcase/1)
from(
[_activity, object] in query,
where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all)
@ -714,10 +716,12 @@ defp restrict_embedded_tag_any(_query, %{tag: _tag, skip_preload: true}) do
raise_on_missing_preload()
end
defp restrict_embedded_tag_any(query, %{tag: [_ | _] = tag}) do
defp restrict_embedded_tag_any(query, %{tag: [_ | _] = tag_any}) do
tag_any = Enum.map(tag_any, &String.downcase/1)
from(
[_activity, object] in query,
where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag)
where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag_any)
)
end
@ -732,6 +736,8 @@ defp restrict_embedded_tag_reject_any(_query, %{tag_reject: _tag_reject, skip_pr
end
defp restrict_embedded_tag_reject_any(query, %{tag_reject: [_ | _] = tag_reject}) do
tag_reject = Enum.map(tag_reject, &String.downcase/1)
from(
[_activity, object] in query,
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
@ -749,6 +755,10 @@ defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do
raise_on_missing_preload()
end
defp restrict_hashtag_all(query, %{tag_all: [single_tag]}) do
restrict_hashtag_any(query, %{tag: single_tag})
end
defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do
from(
[_activity, object] in query,
@ -756,7 +766,7 @@ defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do
fragment(
"""
(SELECT array_agg(hashtags.name) FROM hashtags JOIN hashtags_objects
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?)
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[])
AND hashtags_objects.object_id = ?) @> ?
""",
^tags,
@ -767,7 +777,7 @@ defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do
end
defp restrict_hashtag_all(query, %{tag_all: tag}) when is_binary(tag) do
restrict_hashtag_any(query, %{tag: tag})
restrict_hashtag_all(query, %{tag_all: [tag]})
end
defp restrict_hashtag_all(query, _), do: query
@ -783,7 +793,7 @@ defp restrict_hashtag_any(query, %{tag: [_ | _] = tags}) do
fragment(
"""
EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?)
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[])
AND hashtags_objects.object_id = ? LIMIT 1)
""",
^tags,
@ -809,7 +819,7 @@ defp restrict_hashtag_reject_any(query, %{tag_reject: [_ | _] = tags_reject}) do
fragment(
"""
NOT EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?)
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?::citext[])
AND hashtags_objects.object_id = ? LIMIT 1)
""",
^tags_reject,

View file

@ -213,24 +213,24 @@ test "works for guppe actors" do
test "it fetches the appropriate tag-restricted posts" do
user = insert(:user)
{:ok, status_one} = CommonAPI.post(user, %{status: ". #test"})
{:ok, status_one} = CommonAPI.post(user, %{status: ". #TEST"})
{:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"})
{:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"})
{:ok, status_three} = CommonAPI.post(user, %{status: ". #test #Reject"})
{:ok, status_four} = CommonAPI.post(user, %{status: ". #any1 #any2"})
{:ok, status_five} = CommonAPI.post(user, %{status: ". #any2 #any1"})
{:ok, status_four} = CommonAPI.post(user, %{status: ". #Any1 #any2"})
{:ok, status_five} = CommonAPI.post(user, %{status: ". #Any2 #any1"})
for hashtag_timeline_strategy <- [true, false] do
clear_config([:database, :improved_hashtag_timeline], hashtag_timeline_strategy)
fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]})
fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["TEST", "essais"]})
fetch_three =
ActivityPub.fetch_activities([], %{
type: "Create",
tag: ["test", "essais"],
tag: ["test", "Essais"],
tag_reject: ["reject"]
})
@ -238,21 +238,21 @@ test "it fetches the appropriate tag-restricted posts" do
ActivityPub.fetch_activities([], %{
type: "Create",
tag: ["test"],
tag_all: ["test", "reject"]
tag_all: ["test", "REJECT"]
})
# Testing that deduplication (if needed) is done on DB (not Ecto) level; :limit is important
fetch_five =
ActivityPub.fetch_activities([], %{
type: "Create",
tag: ["any1", "any2"],
tag: ["ANY1", "any2"],
limit: 2
})
fetch_six =
ActivityPub.fetch_activities([], %{
type: "Create",
tag: ["any1", "any2"],
tag: ["any1", "Any2"],
tag_all: [],
tag_reject: []
})