Merge branch 'oban/rich-media-hardening' into 'develop'

Harden Rich Media parsing against very slow or malicious URLs

See merge request pleroma/pleroma!4192
This commit is contained in:
feld 2024-07-24 20:58:31 +00:00
commit 700c106680
9 changed files with 61 additions and 26 deletions

View file

@ -0,0 +1 @@
Harden Rich Media parsing against very slow or malicious URLs

View file

@ -448,7 +448,7 @@
Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OEmbed
],
failure_backoff: 60_000,
timeout: 5_000,
ttl_setters: [
Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl,
Pleroma.Web.RichMedia.Parser.TTL.Opengraph
@ -580,6 +580,8 @@
],
email_blacklist: []
# The Pruner :max_age must be longer than Worker :unique
# value or it cannot enforce uniqueness.
config :pleroma, Oban,
repo: Pleroma.Repo,
log: false,
@ -593,7 +595,7 @@
search_indexing: [limit: 10, paused: true],
slow: 5
],
plugins: [Oban.Plugins.Pruner],
plugins: [{Oban.Plugins.Pruner, max_age: 900}],
crontab: [
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}

View file

@ -2101,11 +2101,11 @@
]
},
%{
key: :failure_backoff,
key: :timeout,
type: :integer,
description:
"Amount of milliseconds after request failure, during which the request will not be retried.",
suggestions: [60_000]
"Amount of milliseconds after which the HTTP request is forcibly terminated.",
suggestions: [5_000]
}
]
},

View file

@ -436,7 +436,7 @@ config :pleroma, Pleroma.Web.MediaProxy.Invalidation.Http,
* `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`.
* `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"].
* `parsers`: list of Rich Media parsers.
* `failure_backoff`: Amount of milliseconds after request failure, during which the request will not be retried.
* `timeout`: Amount of milliseconds after which the HTTP request is forcibly terminated.
## HTTP server

View file

@ -68,7 +68,9 @@ def request(method, url, body, headers, options) when is_binary(url) do
adapter = Application.get_env(:tesla, :adapter)
client = Tesla.client(adapter_middlewares(adapter), adapter)
extra_middleware = options[:tesla_middleware] || []
client = Tesla.client(adapter_middlewares(adapter, extra_middleware), adapter)
maybe_limit(
fn ->
@ -102,20 +104,21 @@ defp maybe_limit(fun, _, _) do
fun.()
end
defp adapter_middlewares(Tesla.Adapter.Gun) do
[Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.ConnectionPool]
defp adapter_middlewares(Tesla.Adapter.Gun, extra_middleware) do
[Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.ConnectionPool] ++
extra_middleware
end
defp adapter_middlewares({Tesla.Adapter.Finch, _}) do
[Tesla.Middleware.FollowRedirects]
defp adapter_middlewares({Tesla.Adapter.Finch, _}, extra_middleware) do
[Tesla.Middleware.FollowRedirects] ++ extra_middleware
end
defp adapter_middlewares(_) do
defp adapter_middlewares(_, extra_middleware) do
if Pleroma.Config.get(:env) == :test do
# Emulate redirects in test env, which are handled by adapters in other environments
[Tesla.Middleware.FollowRedirects]
else
[]
extra_middleware
end
end
end

View file

@ -36,13 +36,9 @@ def run(%{"url" => url} = args) do
:ok
{:error, type} = error
when type in [:invalid_metadata, :body_too_large, :content_type, :validate] ->
when type in [:invalid_metadata, :body_too_large, :content_type, :validate, :get, :head] ->
negative_cache(url_hash)
error
{:error, type} = error
when type in [:get, :head] ->
error
end
end
@ -68,5 +64,5 @@ defp stream_update(%{"activity_id" => activity_id}) do
defp warm_cache(key, val), do: @cachex.put(:rich_media_cache, key, val)
defp negative_cache(key, ttl \\ :timer.minutes(15)),
do: @cachex.put(:rich_media_cache, key, nil, ttl: ttl)
do: @cachex.put(:rich_media_cache, key, :error, ttl: ttl)
end

View file

@ -69,9 +69,12 @@ defp check_content_length(headers) do
end
defp http_options do
timeout = Config.get!([:rich_media, :timeout])
[
pool: :rich_media,
max_body: Config.get([:rich_media, :max_body], 5_000_000)
max_body: Config.get([:rich_media, :max_body], 5_000_000),
tesla_middleware: [{Tesla.Middleware.Timeout, timeout: timeout}]
]
end
end

View file

@ -3,6 +3,7 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Workers.RichMediaWorker do
alias Pleroma.Config
alias Pleroma.Web.RichMedia.Backfill
alias Pleroma.Web.RichMedia.Card
@ -19,18 +20,21 @@ def perform(%Job{args: %{"op" => "backfill", "url" => _url} = args}) do
:ok
{:error, type}
when type in [:invalid_metadata, :body_too_large, :content_type, :validate] ->
when type in [:invalid_metadata, :body_too_large, :content_type, :validate, :get, :head] ->
{:cancel, type}
{:error, type}
when type in [:get, :head] ->
{:error, type}
error ->
{:error, error}
end
end
# There is timeout value enforced by Tesla.Middleware.Timeout
# which can be found in the RichMedia.Helpers module to allow us to detect
# a slow/infinite data stream and insert a negative cache entry for the URL
# We pad it by 2 seconds to be certain a slow connection is detected and we
# can inject a negative cache entry for the URL
@impl Oban.Worker
def timeout(_job), do: :timer.seconds(5)
def timeout(_job) do
Config.get!([:rich_media, :timeout]) + :timer.seconds(2)
end
end

View file

@ -0,0 +1,26 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.BackfillTest do
use Pleroma.DataCase
alias Pleroma.Web.RichMedia.Backfill
alias Pleroma.Web.RichMedia.Card
import Mox
setup_all do: clear_config([:rich_media, :enabled], true)
test "sets a negative cache entry for an error" do
url = "https://bad.example.com/"
url_hash = Card.url_to_hash(url)
Tesla.Mock.mock(fn %{url: ^url} -> :error end)
Pleroma.CachexMock
|> expect(:put, fn :rich_media_cache, ^url_hash, :error, ttl: _ -> {:ok, true} end)
Backfill.run(%{"url" => url})
end
end