social.metadata.moe/lib/pleroma/user/backup.ex

362 lines
10 KiB
Elixir
Raw Permalink Normal View History

2020-08-19 20:31:33 +09:00
# Pleroma: A lightweight social networking server
2022-02-26 15:11:42 +09:00
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
2020-08-19 20:31:33 +09:00
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.User.Backup do
2020-09-03 01:21:33 +09:00
use Ecto.Schema
import Ecto.Changeset
import Ecto.Query
2020-09-27 02:45:03 +09:00
import Pleroma.Web.Gettext
2020-09-03 01:21:33 +09:00
2022-12-19 05:55:52 +09:00
require Logger
2020-09-11 01:53:06 +09:00
require Pleroma.Constants
2020-08-19 20:31:33 +09:00
alias Pleroma.Activity
alias Pleroma.Bookmark
alias Pleroma.Config
2020-09-03 01:21:33 +09:00
alias Pleroma.Repo
alias Pleroma.Uploaders.Uploader
2020-08-19 20:31:33 +09:00
alias Pleroma.User
alias Pleroma.Web.ActivityPub.ActivityPub
alias Pleroma.Web.ActivityPub.Transmogrifier
alias Pleroma.Web.ActivityPub.UserView
2020-09-05 02:48:52 +09:00
alias Pleroma.Workers.BackupWorker
2020-08-19 20:31:33 +09:00
Pleroma.User.Backup: fix some dialyzer errors lib/pleroma/user/backup.ex:207:call The function call will not succeed. :zip.create( string(), [:"\"actor.json\"", :"\"outbox.json\"", :"\"likes.json\"", :"\"bookmarks.json\""], [{:cwd, binary()}, ...] ) will never return since the success typing is: ( atom() | [atom() | [any()] | char()], [ atom() | [atom() | [any()] | char()] | {atom() | [atom() | [any()] | char()], binary()} | {atom() | [atom() | [any()] | char()], binary(), {:file_info, :undefined | non_neg_integer(), :device | :directory | :other | :regular | :symlink | :undefined, :none | :read | :read_write | :undefined | :write, :undefined | non_neg_integer() | {_, _}, :undefined | non_neg_integer() | {_, _}, :undefined | non_neg_integer() | {_, _}, :undefined | non_neg_integer(), :undefined | non_neg_integer(), :undefined | non_neg_integer(), :undefined | non_neg_integer(), :undefined | non_neg_integer(), :undefined | non_neg_integer(), :undefined | non_neg_integer()}} ], [ :cooked | :memory | :verbose | {:comment, string()} | {:compress, :all | [[any()]] | {:add, [any()]} | {:del, [any()]}} | {:cwd, string()} | {:uncompress, :all | [[any()]] | {:add, [any()]} | {:del, [any()]}} ] ) :: {:error, _} | {:ok, atom() | [atom() | [any()] | char()] | {atom() | [atom() | [any()] | char()], binary()}} and the contract is (name, fileList, options) :: retValue when name: :file.name(), fileList: [:FileSpec], fileSpec: :file.name() | {:file.name(), binary()} | {:file.name(), binary(), :file.file_info()}, options: [:Option], option: create_option(), retValue: {:ok, FileName :: filename()} | {:ok, {FileName :: filename(), binary()}} | {:error, Reason :: term()}
2024-01-23 07:11:22 +09:00
@type t :: %__MODULE__{}
2020-09-03 01:21:33 +09:00
schema "backups" do
field(:content_type, :string)
field(:file_name, :string)
field(:file_size, :integer, default: 0)
field(:processed, :boolean, default: false)
field(:tempdir, :string)
2020-09-03 01:21:33 +09:00
belongs_to(:user, User, type: FlakeId.Ecto.CompatType)
timestamps()
end
@doc """
Schedules a job to backup a user if the number of backup requests has not exceeded the limit.
Admins can directly call new/1 and schedule_backup/1 to bypass the limit.
"""
@spec user(User.t()) :: {:ok, t()} | {:error, any()}
def user(user) do
days = Config.get([__MODULE__, :limit_days])
with true <- permitted?(user),
%__MODULE__{} = backup <- new(user),
{:ok, inserted_backup} <- Repo.insert(backup),
{:ok, %Oban.Job{}} <- schedule_backup(inserted_backup) do
{:ok, inserted_backup}
else
false ->
{:error,
dngettext(
"errors",
"Last export was less than a day ago",
"Last export was less than %{days} days ago",
days,
days: days
)}
e ->
{:error, e}
2020-09-03 01:21:33 +09:00
end
end
@doc "Generates a %Backup{} for a user with a random file name"
@spec new(User.t()) :: t()
2020-09-03 01:21:33 +09:00
def new(user) do
rand_str = :crypto.strong_rand_bytes(32) |> Base.url_encode64(padding: false)
datetime = Calendar.NaiveDateTime.Format.iso8601_basic(NaiveDateTime.utc_now())
name = "archive-#{user.nickname}-#{datetime}-#{rand_str}.zip"
%__MODULE__{
content_type: "application/zip",
2022-12-16 16:56:32 +09:00
file_name: name,
tempdir: tempdir(),
user: user
2020-09-03 01:21:33 +09:00
}
end
@doc "Schedules the execution of the provided backup"
@spec schedule_backup(t()) :: {:ok, Oban.Job.t()} | {:error, any()}
def schedule_backup(backup) do
with false <- is_nil(backup.id) do
%{"op" => "process", "backup_id" => backup.id}
|> BackupWorker.new()
|> Oban.insert()
else
true ->
{:error, "Backup is missing id. Please insert it into the Repo first."}
e ->
{:error, e}
end
end
@doc "Deletes the backup archive file and removes the database record"
@spec delete_archive(t()) :: {:ok, Ecto.Schema.t()} | {:error, Ecto.Changeset.t()}
def delete_archive(backup) do
uploader = Config.get([Pleroma.Upload, :uploader])
2020-09-05 02:48:52 +09:00
2020-09-27 02:50:31 +09:00
with :ok <- uploader.delete_file(Path.join("backups", backup.file_name)) do
2020-09-05 02:48:52 +09:00
Repo.delete(backup)
end
end
@doc "Schedules a job to delete the backup archive"
@spec schedule_delete(t()) :: {:ok, Oban.Job.t()} | {:error, any()}
def schedule_delete(backup) do
days = Config.get([__MODULE__, :purge_after_days])
time = 60 * 60 * 24 * days
scheduled_at = Calendar.NaiveDateTime.add!(backup.inserted_at, time)
2020-09-03 01:21:33 +09:00
%{"op" => "delete", "backup_id" => backup.id}
|> BackupWorker.new(scheduled_at: scheduled_at)
|> Oban.insert()
end
defp permitted?(user) do
with {_, %__MODULE__{inserted_at: inserted_at}} <- {:last, get_last(user)},
days = Config.get([__MODULE__, :limit_days]),
diff = Timex.diff(NaiveDateTime.utc_now(), inserted_at, :days),
{_, true} <- {:diff, diff > days} do
true
else
{:last, nil} -> true
{:diff, false} -> false
2020-09-03 01:21:33 +09:00
end
end
@doc "Returns last backup for the provided user"
@spec get_last(User.t()) :: t()
def get_last(%User{id: user_id}) do
2020-09-03 01:21:33 +09:00
__MODULE__
|> where(user_id: ^user_id)
|> order_by(desc: :id)
|> limit(1)
|> Repo.one()
end
@doc "Lists all existing backups for a user"
@spec list(User.t()) :: [Ecto.Schema.t() | term()]
2020-09-09 06:04:00 +09:00
def list(%User{id: user_id}) do
__MODULE__
|> where(user_id: ^user_id)
|> order_by(desc: :id)
|> Repo.all()
end
@doc "Schedules deletion of all but the the most recent backup"
@spec remove_outdated(User.t()) :: :ok
def remove_outdated(user) do
with %__MODULE__{} = latest_backup <- get_last(user) do
__MODULE__
|> where(user_id: ^user.id)
|> where([b], b.id != ^latest_backup.id)
|> Repo.all()
|> Enum.each(&schedule_delete/1)
else
_ -> :ok
end
2020-09-03 02:45:22 +09:00
end
def get_by_id(id), do: Repo.get(__MODULE__, id)
2022-12-16 16:56:32 +09:00
@doc "Generates changeset for %Pleroma.User.Backup{}"
@spec changeset(%__MODULE__{}, map()) :: %Ecto.Changeset{}
def changeset(backup \\ %__MODULE__{}, attrs) do
2022-12-16 16:56:32 +09:00
backup
|> cast(attrs, [:content_type, :file_name, :file_size, :processed, :tempdir])
2022-12-16 16:56:32 +09:00
end
@doc "Updates the backup record"
@spec update_record(%__MODULE__{}, map()) :: {:ok, %__MODULE__{}} | {:error, %Ecto.Changeset{}}
def update_record(%__MODULE__{} = backup, attrs) do
backup
|> changeset(attrs)
|> Repo.update()
2020-09-03 01:21:33 +09:00
end
2020-08-19 20:31:33 +09:00
@files [
~c"actor.json",
~c"outbox.json",
~c"likes.json",
~c"bookmarks.json",
~c"followers.json",
~c"following.json"
]
@spec run(t()) :: {:ok, t()} | {:error, :failed}
def run(%__MODULE__{} = backup) do
2020-09-03 01:21:33 +09:00
backup = Repo.preload(backup, :user)
tempfile = Path.join([backup.tempdir, backup.file_name])
with {_, :ok} <- {:mkdir, File.mkdir_p(backup.tempdir)},
{_, :ok} <- {:actor, actor(backup.tempdir, backup.user)},
{_, :ok} <- {:statuses, statuses(backup.tempdir, backup.user)},
{_, :ok} <- {:likes, likes(backup.tempdir, backup.user)},
{_, :ok} <- {:bookmarks, bookmarks(backup.tempdir, backup.user)},
{_, :ok} <- {:followers, followers(backup.tempdir, backup.user)},
{_, :ok} <- {:following, following(backup.tempdir, backup.user)},
{_, {:ok, _zip_path}} <-
{:zip, :zip.create(to_charlist(tempfile), @files, cwd: to_charlist(backup.tempdir))},
{_, {:ok, %File.Stat{size: zip_size}}} <- {:filestat, File.stat(tempfile)},
{:ok, updated_backup} <- update_record(backup, %{file_size: zip_size}) do
{:ok, updated_backup}
Pleroma.User.Backup: fix some dialyzer errors lib/pleroma/user/backup.ex:207:call The function call will not succeed. :zip.create( string(), [:"\"actor.json\"", :"\"outbox.json\"", :"\"likes.json\"", :"\"bookmarks.json\""], [{:cwd, binary()}, ...] ) will never return since the success typing is: ( atom() | [atom() | [any()] | char()], [ atom() | [atom() | [any()] | char()] | {atom() | [atom() | [any()] | char()], binary()} | {atom() | [atom() | [any()] | char()], binary(), {:file_info, :undefined | non_neg_integer(), :device | :directory | :other | :regular | :symlink | :undefined, :none | :read | :read_write | :undefined | :write, :undefined | non_neg_integer() | {_, _}, :undefined | non_neg_integer() | {_, _}, :undefined | non_neg_integer() | {_, _}, :undefined | non_neg_integer(), :undefined | non_neg_integer(), :undefined | non_neg_integer(), :undefined | non_neg_integer(), :undefined | non_neg_integer(), :undefined | non_neg_integer(), :undefined | non_neg_integer()}} ], [ :cooked | :memory | :verbose | {:comment, string()} | {:compress, :all | [[any()]] | {:add, [any()]} | {:del, [any()]}} | {:cwd, string()} | {:uncompress, :all | [[any()]] | {:add, [any()]} | {:del, [any()]}} ] ) :: {:error, _} | {:ok, atom() | [atom() | [any()] | char()] | {atom() | [atom() | [any()] | char()], binary()}} and the contract is (name, fileList, options) :: retValue when name: :file.name(), fileList: [:FileSpec], fileSpec: :file.name() | {:file.name(), binary()} | {:file.name(), binary(), :file.file_info()}, options: [:Option], option: create_option(), retValue: {:ok, FileName :: filename()} | {:ok, {FileName :: filename(), binary()}} | {:error, Reason :: term()}
2024-01-23 07:11:22 +09:00
else
_ ->
File.rm_rf(backup.tempdir)
{:error, :failed}
2020-09-01 01:31:21 +09:00
end
end
defp tempdir do
2024-06-25 11:52:21 +09:00
rand = :crypto.strong_rand_bytes(8) |> Base.url_encode64(padding: false)
subdir = "backup-#{rand}"
case Config.get([__MODULE__, :tempdir]) do
nil ->
2024-06-25 11:52:21 +09:00
Path.join([System.tmp_dir!(), subdir])
path ->
2024-06-25 11:52:21 +09:00
Path.join([path, subdir])
end
2020-09-17 04:21:13 +09:00
end
@doc "Uploads the completed backup and marks it as processed"
@spec upload(t()) :: {:ok, t()}
def upload(%__MODULE__{tempdir: tempdir} = backup) when is_binary(tempdir) do
uploader = Config.get([Pleroma.Upload, :uploader])
2020-09-01 01:31:21 +09:00
upload = %Pleroma.Upload{
2020-09-03 01:21:33 +09:00
name: backup.file_name,
tempfile: Path.join([tempdir, backup.file_name]),
2020-09-03 01:21:33 +09:00
content_type: backup.content_type,
2020-09-27 02:50:31 +09:00
path: Path.join("backups", backup.file_name)
2020-09-01 01:31:21 +09:00
}
with {:ok, _} <- Uploader.put_file(uploader, upload),
{:ok, uploaded_backup} <- update_record(backup, %{processed: true}),
{:ok, _} <- File.rm_rf(tempdir) do
{:ok, uploaded_backup}
2020-08-19 20:31:33 +09:00
end
end
defp actor(dir, user) do
2020-08-19 20:31:33 +09:00
with {:ok, json} <-
UserView.render("user.json", %{user: user})
|> Map.merge(%{"likes" => "likes.json", "bookmarks" => "bookmarks.json"})
|> Jason.encode() do
2020-09-27 02:50:31 +09:00
File.write(Path.join(dir, "actor.json"), json)
2020-08-19 20:31:33 +09:00
end
end
defp write_header(file, name) do
IO.write(
file,
"""
{
"@context": "https://www.w3.org/ns/activitystreams",
"id": "#{name}.json",
"type": "OrderedCollection",
"orderedItems": [
2020-09-11 01:53:06 +09:00
2020-08-19 20:31:33 +09:00
"""
)
end
defp write(query, dir, name, fun) do
2020-09-27 02:50:31 +09:00
path = Path.join(dir, "#{name}.json")
2020-08-19 20:31:33 +09:00
chunk_size = Config.get([__MODULE__, :process_chunk_size])
2022-12-24 14:17:17 +09:00
2020-08-19 20:31:33 +09:00
with {:ok, file} <- File.open(path, [:write, :utf8]),
:ok <- write_header(file, name) do
total =
query
2022-12-24 14:17:17 +09:00
|> Pleroma.Repo.chunk_stream(chunk_size, _returns_as = :one, timeout: :infinity)
|> Enum.reduce(0, fn i, acc ->
2022-12-19 05:55:52 +09:00
with {:ok, data} <-
(try do
fun.(i)
rescue
e -> {:error, e}
end),
{:ok, str} <- Jason.encode(data),
:ok <- IO.write(file, str <> ",\n") do
acc + 1
else
2022-12-19 05:55:52 +09:00
{:error, e} ->
Logger.warning(
2022-12-19 05:55:52 +09:00
"Error processing backup item: #{inspect(e)}\n The item is: #{inspect(i)}"
)
acc
_ ->
acc
end
end)
2020-08-19 20:31:33 +09:00
with :ok <- :file.pwrite(file, {:eof, -2}, "\n],\n \"totalItems\": #{total}}") do
File.close(file)
end
end
end
defp bookmarks(dir, %{id: user_id} = _user) do
2020-08-19 20:31:33 +09:00
Bookmark
|> where(user_id: ^user_id)
|> join(:inner, [b], activity in assoc(b, :activity))
|> select([b, a], %{id: b.id, object: fragment("(?)->>'object'", a.data)})
|> write(dir, "bookmarks", fn a -> {:ok, a.object} end)
2020-08-19 20:31:33 +09:00
end
defp likes(dir, user) do
2020-08-19 20:31:33 +09:00
user.ap_id
|> Activity.Queries.by_actor()
|> Activity.Queries.by_type("Like")
|> select([like], %{id: like.id, object: fragment("(?)->>'object'", like.data)})
|> write(dir, "likes", fn a -> {:ok, a.object} end)
2020-08-19 20:31:33 +09:00
end
defp statuses(dir, user) do
2020-08-19 20:31:33 +09:00
opts =
%{}
|> Map.put(:type, ["Create", "Announce"])
2020-09-11 01:53:06 +09:00
|> Map.put(:actor_id, user.ap_id)
2020-08-19 20:31:33 +09:00
2020-09-11 01:53:06 +09:00
[
[Pleroma.Constants.as_public(), user.ap_id],
User.following(user),
Pleroma.List.memberships(user)
]
2020-08-19 20:31:33 +09:00
|> Enum.concat()
|> ActivityPub.fetch_activities_query(opts)
2022-12-16 16:56:32 +09:00
|> write(
dir,
"outbox",
fn a ->
with {:ok, activity} <- Transmogrifier.prepare_outgoing(a.data) do
{:ok, Map.delete(activity, "@context")}
end
end
2022-12-16 16:56:32 +09:00
)
2020-08-19 20:31:33 +09:00
end
defp followers(dir, user) do
User.get_followers_query(user)
|> write(dir, "followers", fn a -> {:ok, a.ap_id} end)
end
defp following(dir, user) do
User.get_friends_query(user)
|> write(dir, "following", fn a -> {:ok, a.ap_id} end)
end
end