2023-03-12 18:00:57 +03:00
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2021-08-10 14:32:39 +03:00
package dereferencing
import (
"context"
"errors"
2023-05-12 12:15:54 +03:00
"io"
2021-08-10 14:32:39 +03:00
"net/url"
2023-10-25 17:04:53 +03:00
"slices"
2023-11-04 23:21:20 +03:00
"time"
2023-10-25 17:04:53 +03:00
2021-08-10 14:32:39 +03:00
"github.com/superseriousbusiness/gotosocial/internal/ap"
2022-11-29 12:24:55 +03:00
"github.com/superseriousbusiness/gotosocial/internal/config"
2022-09-12 14:03:23 +03:00
"github.com/superseriousbusiness/gotosocial/internal/db"
2023-05-31 11:39:54 +03:00
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
2023-05-28 15:08:35 +03:00
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
2021-08-10 14:32:39 +03:00
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
2022-07-19 11:47:55 +03:00
"github.com/superseriousbusiness/gotosocial/internal/log"
2022-01-09 20:41:22 +03:00
"github.com/superseriousbusiness/gotosocial/internal/media"
2023-03-01 20:52:44 +03:00
"github.com/superseriousbusiness/gotosocial/internal/transport"
2023-10-31 14:05:17 +03:00
"github.com/superseriousbusiness/gotosocial/internal/util"
2021-08-10 14:32:39 +03:00
)
2024-02-09 17:24:49 +03:00
// statusFresh returns true if the given status is still
// considered "fresh" according to the desired freshness
// window (falls back to default status freshness if nil).
//
// Local statuses will always be considered fresh,
// because there's no remote state that may have changed.
//
// Return value of false indicates that the status
// is not fresh and should be refreshed from remote.
func statusFresh (
status * gtsmodel . Status ,
window * FreshnessWindow ,
) bool {
// Take default if no
// freshness window preferred.
if window == nil {
window = DefaultStatusFreshness
2023-12-15 17:24:39 +03:00
}
2024-02-09 17:24:49 +03:00
if status . IsLocal ( ) {
// Can't refresh
// local statuses.
2023-05-12 12:15:54 +03:00
return true
2022-11-15 21:45:15 +03:00
}
2023-05-12 12:15:54 +03:00
2024-02-09 17:24:49 +03:00
// Moment when the status is
// considered stale according to
// desired freshness window.
staleAt := status . FetchedAt . Add (
time . Duration ( * window ) ,
)
// It's still fresh if the time now
// is not past the point of staleness.
return ! time . Now ( ) . After ( staleAt )
2021-08-10 14:32:39 +03:00
}
2023-11-04 23:21:20 +03:00
// GetStatusByURI will attempt to fetch a status by its URI, first checking the database. In the case of a newly-met remote model, or a remote model whose 'last_fetched' date
// is beyond a certain interval, the status will be dereferenced. In the case of dereferencing, some low-priority status information may be enqueued for asynchronous fetching,
// e.g. dereferencing the status thread. Param 'syncParent' = true indicates to fetch status ancestors synchronously. An ActivityPub object indicates the status was dereferenced.
2023-10-23 12:58:13 +03:00
func ( d * Dereferencer ) GetStatusByURI ( ctx context . Context , requestUser string , uri * url . URL ) ( * gtsmodel . Status , ap . Statusable , error ) {
2024-01-31 16:29:47 +03:00
// Fetch and dereference / update status if necessary.
2023-11-04 23:21:20 +03:00
status , statusable , isNew , err := d . getStatusByURI ( ctx ,
2023-05-12 12:15:54 +03:00
requestUser ,
uri ,
)
2024-01-31 16:29:47 +03:00
2023-05-12 12:15:54 +03:00
if err != nil {
2024-01-31 16:29:47 +03:00
if status == nil {
// err with no existing
// status for fallback.
return nil , nil , err
}
log . Errorf ( ctx , "error updating status %s: %v" , uri , err )
} else if statusable != nil {
2023-05-12 12:15:54 +03:00
2023-11-04 23:21:20 +03:00
// Deref parents + children.
d . dereferenceThread ( ctx ,
requestUser ,
uri ,
status ,
statusable ,
isNew ,
)
2023-05-12 12:15:54 +03:00
}
2023-11-04 23:21:20 +03:00
return status , statusable , nil
2023-05-12 12:15:54 +03:00
}
2024-01-31 16:29:47 +03:00
// getStatusByURI is a package internal form of .GetStatusByURI() that doesn't dereference thread on update, and may return an existing status with error on failed re-fetch.
2023-11-04 23:21:20 +03:00
func ( d * Dereferencer ) getStatusByURI ( ctx context . Context , requestUser string , uri * url . URL ) ( * gtsmodel . Status , ap . Statusable , bool , error ) {
2023-05-12 12:15:54 +03:00
var (
status * gtsmodel . Status
uriStr = uri . String ( )
err error
)
2024-01-31 16:29:47 +03:00
// Search the database for existing by URI.
2023-05-31 11:39:54 +03:00
status , err = d . state . DB . GetStatusByURI (
2024-01-31 16:29:47 +03:00
2023-05-31 11:39:54 +03:00
// request a barebones object, it may be in the
// db but with related models not yet dereferenced.
gtscontext . SetBarebones ( ctx ) ,
uriStr ,
)
2023-05-12 12:15:54 +03:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-11-04 23:21:20 +03:00
return nil , nil , false , gtserror . Newf ( "error checking database for status %s by uri: %w" , uriStr , err )
2021-08-10 14:32:39 +03:00
}
2022-11-29 12:24:55 +03:00
if status == nil {
2024-01-31 16:29:47 +03:00
// Else, search database for existing by URL.
2023-05-31 11:39:54 +03:00
status , err = d . state . DB . GetStatusByURL (
gtscontext . SetBarebones ( ctx ) ,
uriStr ,
)
2023-05-12 12:15:54 +03:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-11-04 23:21:20 +03:00
return nil , nil , false , gtserror . Newf ( "error checking database for status %s by url: %w" , uriStr , err )
2022-11-29 12:24:55 +03:00
}
2022-05-23 18:40:03 +03:00
}
2023-05-12 12:15:54 +03:00
if status == nil {
2024-01-31 16:29:47 +03:00
// Ensure not a failed search for a local
// status, if so we know it doesn't exist.
if uri . Host == config . GetHost ( ) ||
uri . Host == config . GetAccountDomain ( ) {
return nil , nil , false , gtserror . SetUnretrievable ( err )
2022-11-29 12:24:55 +03:00
}
2023-05-12 12:15:54 +03:00
// Create and pass-through a new bare-bones model for deref.
2023-10-31 14:12:22 +03:00
return d . enrichStatusSafely ( ctx , requestUser , uri , & gtsmodel . Status {
2024-01-31 16:29:47 +03:00
Local : util . Ptr ( false ) ,
2023-05-12 12:15:54 +03:00
URI : uriStr ,
} , nil )
2022-11-29 12:24:55 +03:00
}
2024-02-09 17:24:49 +03:00
if statusFresh ( status , DefaultStatusFreshness ) {
2024-01-09 12:42:39 +03:00
// This is an existing status that is up-to-date,
// before returning ensure it is fully populated.
2023-05-31 11:39:54 +03:00
if err := d . state . DB . PopulateStatus ( ctx , status ) ; err != nil {
log . Errorf ( ctx , "error populating existing status: %v" , err )
}
2024-01-09 12:42:39 +03:00
2023-11-04 23:21:20 +03:00
return status , nil , false , nil
2023-05-31 11:39:54 +03:00
}
2024-01-31 16:29:47 +03:00
// Try to deref and update existing status model.
2023-11-04 23:21:20 +03:00
latest , statusable , isNew , err := d . enrichStatusSafely ( ctx ,
2023-05-12 12:15:54 +03:00
requestUser ,
uri ,
status ,
nil ,
)
2023-03-01 20:52:44 +03:00
2024-01-31 16:29:47 +03:00
if err != nil {
// fallback to the
// existing status.
latest = status
statusable = nil
2021-08-10 14:32:39 +03:00
}
2024-01-31 16:29:47 +03:00
return latest , statusable , isNew , err
2023-05-12 12:15:54 +03:00
}
2021-08-10 14:32:39 +03:00
2023-11-04 23:21:20 +03:00
// RefreshStatus is functionally equivalent to GetStatusByURI(), except that it requires a pre
// populated status model (with AT LEAST uri set), and ALL thread dereferencing is asynchronous.
func ( d * Dereferencer ) RefreshStatus (
ctx context . Context ,
requestUser string ,
status * gtsmodel . Status ,
statusable ap . Statusable ,
2024-02-09 17:24:49 +03:00
window * FreshnessWindow ,
2023-11-04 23:21:20 +03:00
) ( * gtsmodel . Status , ap . Statusable , error ) {
2024-01-09 12:42:39 +03:00
// If no incoming data is provided,
// check whether status needs update.
if statusable == nil &&
2024-02-09 17:24:49 +03:00
statusFresh ( status , window ) {
2023-05-12 12:15:54 +03:00
return status , nil , nil
2021-08-10 14:32:39 +03:00
}
2023-05-12 12:15:54 +03:00
// Parse the URI from status.
uri , err := url . Parse ( status . URI )
2021-08-10 14:32:39 +03:00
if err != nil {
2023-05-28 15:08:35 +03:00
return nil , nil , gtserror . Newf ( "invalid status uri %q: %w" , status . URI , err )
2021-08-10 14:32:39 +03:00
}
2023-11-04 23:21:20 +03:00
// Try to update + dereference the passed status model.
latest , statusable , isNew , err := d . enrichStatusSafely ( ctx ,
2023-05-12 12:15:54 +03:00
requestUser ,
uri ,
status ,
2023-11-04 23:21:20 +03:00
statusable ,
2023-05-12 12:15:54 +03:00
)
2022-05-23 18:40:03 +03:00
if err != nil {
2023-05-12 12:15:54 +03:00
return nil , nil , err
2022-05-23 18:40:03 +03:00
}
2021-08-10 14:32:39 +03:00
2023-11-04 23:21:20 +03:00
if statusable != nil {
// Deref parents + children.
d . dereferenceThread ( ctx ,
requestUser ,
uri ,
2024-02-18 12:49:40 +03:00
latest ,
2023-11-04 23:21:20 +03:00
statusable ,
isNew ,
)
}
2021-08-10 14:32:39 +03:00
2023-11-04 23:21:20 +03:00
return latest , statusable , nil
2021-08-10 14:32:39 +03:00
}
2023-11-04 23:21:20 +03:00
// RefreshStatusAsync is functionally equivalent to RefreshStatus(), except that ALL
// dereferencing is queued for asynchronous processing, (both thread AND status).
func ( d * Dereferencer ) RefreshStatusAsync (
ctx context . Context ,
requestUser string ,
status * gtsmodel . Status ,
statusable ap . Statusable ,
2024-02-09 17:24:49 +03:00
window * FreshnessWindow ,
2023-11-04 23:21:20 +03:00
) {
2024-01-09 12:42:39 +03:00
// If no incoming data is provided,
// check whether status needs update.
if statusable == nil &&
2024-02-09 17:24:49 +03:00
statusFresh ( status , window ) {
2023-05-12 12:15:54 +03:00
return
2021-08-10 14:32:39 +03:00
}
2023-05-12 12:15:54 +03:00
// Parse the URI from status.
uri , err := url . Parse ( status . URI )
2021-08-10 14:32:39 +03:00
if err != nil {
2023-05-28 15:08:35 +03:00
log . Errorf ( ctx , "invalid status uri %q: %v" , status . URI , err )
2023-05-12 12:15:54 +03:00
return
2021-08-10 14:32:39 +03:00
}
2023-11-04 23:21:20 +03:00
// Enqueue a worker function to re-fetch this status entirely async.
2023-05-12 12:15:54 +03:00
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
2023-11-04 23:21:20 +03:00
latest , statusable , _ , err := d . enrichStatusSafely ( ctx ,
requestUser ,
uri ,
status ,
statusable ,
)
2023-05-12 12:15:54 +03:00
if err != nil {
log . Errorf ( ctx , "error enriching remote status: %v" , err )
return
}
2023-11-04 23:21:20 +03:00
if statusable != nil {
if err := d . DereferenceStatusAncestors ( ctx , requestUser , latest ) ; err != nil {
log . Error ( ctx , err )
}
if err := d . DereferenceStatusDescendants ( ctx , requestUser , uri , statusable ) ; err != nil {
log . Error ( ctx , err )
}
2023-10-31 14:12:22 +03:00
}
2023-05-12 12:15:54 +03:00
} )
2021-08-10 14:32:39 +03:00
}
2023-10-31 14:12:22 +03:00
// enrichStatusSafely wraps enrichStatus() to perform
// it within the State{}.FedLocks mutexmap, which protects
// dereferencing actions with per-URI mutex locks.
func ( d * Dereferencer ) enrichStatusSafely (
ctx context . Context ,
requestUser string ,
uri * url . URL ,
status * gtsmodel . Status ,
apubStatus ap . Statusable ,
2023-11-04 23:21:20 +03:00
) ( * gtsmodel . Status , ap . Statusable , bool , error ) {
2023-10-31 14:12:22 +03:00
uriStr := status . URI
2023-11-30 14:32:45 +03:00
var isNew bool
// Check if this is a new status (to us).
if isNew = ( status . ID == "" ) ; ! isNew {
2023-10-31 14:12:22 +03:00
// This is an existing status, first try to populate it. This
// is required by the checks below for existing tags, media etc.
if err := d . state . DB . PopulateStatus ( ctx , status ) ; err != nil {
log . Errorf ( ctx , "error populating existing status %s: %v" , uriStr , err )
}
}
// Acquire per-URI deref lock, wraping unlock
// to safely defer in case of panic, while still
// performing more granular unlocks when needed.
unlock := d . state . FedLocks . Lock ( uriStr )
2024-02-09 14:38:51 +03:00
unlock = util . DoOnce ( unlock )
2023-10-31 14:12:22 +03:00
defer unlock ( )
// Perform status enrichment with passed vars.
latest , apubStatus , err := d . enrichStatus ( ctx ,
requestUser ,
uri ,
status ,
apubStatus ,
)
2024-01-31 16:29:47 +03:00
if gtserror . StatusCode ( err ) >= 400 {
2024-01-26 16:17:10 +03:00
if isNew {
// This was a new status enrich
// attempt which failed before we
// got to store it, so we can't
// return anything useful.
return nil , nil , isNew , err
}
// We had this status stored already
// before this enrichment attempt.
//
// Update fetched_at to slow re-attempts
// but don't return early. We can still
// return the model we had stored already.
2023-10-31 14:12:22 +03:00
status . FetchedAt = time . Now ( )
2024-01-26 16:17:10 +03:00
if err := d . state . DB . UpdateStatus ( ctx , status , "fetched_at" ) ; err != nil {
2024-01-31 16:29:47 +03:00
log . Error ( ctx , "error updating %s fetched_at: %v" , uriStr , err )
2024-01-26 16:17:10 +03:00
}
2023-10-31 14:12:22 +03:00
}
// Unlock now
// we're done.
unlock ( )
if errors . Is ( err , db . ErrAlreadyExists ) {
// Ensure AP model isn't set,
// otherwise this indicates WE
// enriched the status.
apubStatus = nil
2023-11-30 14:32:45 +03:00
// We leave 'isNew' set so that caller
// still dereferences parents, otherwise
// the version we pass back may not have
// these attached as inReplyTos yet (since
// those happen OUTSIDE federator lock).
//
// TODO: performance-wise, this won't be
// great. should improve this if we can!
2023-10-31 14:12:22 +03:00
// DATA RACE! We likely lost out to another goroutine
// in a call to db.Put(Status). Look again in DB by URI.
latest , err = d . state . DB . GetStatusByURI ( ctx , status . URI )
if err != nil {
err = gtserror . Newf ( "error getting status %s from database after race: %w" , uriStr , err )
}
}
2023-11-04 23:21:20 +03:00
return latest , apubStatus , isNew , err
2023-10-31 14:12:22 +03:00
}
2023-06-24 10:32:10 +03:00
// enrichStatus will enrich the given status, whether a new
// barebones model, or existing model from the database.
// It handles necessary dereferencing, database updates, etc.
2023-10-23 12:58:13 +03:00
func ( d * Dereferencer ) enrichStatus (
2023-06-24 10:32:10 +03:00
ctx context . Context ,
requestUser string ,
uri * url . URL ,
status * gtsmodel . Status ,
apubStatus ap . Statusable ,
) ( * gtsmodel . Status , ap . Statusable , error ) {
2023-05-12 12:15:54 +03:00
// Pre-fetch a transport for requesting username, used by later dereferencing.
tsport , err := d . transportController . NewTransportForUsername ( ctx , requestUser )
2021-08-10 14:32:39 +03:00
if err != nil {
2023-05-28 15:08:35 +03:00
return nil , nil , gtserror . Newf ( "couldn't create transport: %w" , err )
2021-08-10 14:32:39 +03:00
}
2023-05-12 12:15:54 +03:00
// Check whether this account URI is a blocked domain / subdomain.
if blocked , err := d . state . DB . IsDomainBlocked ( ctx , uri . Host ) ; err != nil {
2023-05-28 15:08:35 +03:00
return nil , nil , gtserror . Newf ( "error checking blocked domain: %w" , err )
2023-05-12 12:15:54 +03:00
} else if blocked {
2023-06-24 10:32:10 +03:00
err = gtserror . Newf ( "%s is blocked" , uri . Host )
return nil , nil , gtserror . SetUnretrievable ( err )
2021-08-10 14:32:39 +03:00
}
2023-05-12 12:15:54 +03:00
if apubStatus == nil {
// Dereference latest version of the status.
2024-02-23 18:24:40 +03:00
rsp , err := tsport . Dereference ( ctx , uri )
2021-08-10 14:32:39 +03:00
if err != nil {
2024-01-26 16:17:10 +03:00
err := gtserror . Newf ( "error dereferencing %s: %w" , uri , err )
2023-06-22 22:46:36 +03:00
return nil , nil , gtserror . SetUnretrievable ( err )
2021-08-10 14:32:39 +03:00
}
2024-02-23 18:24:40 +03:00
// Attempt to resolve ActivityPub status from response.
apubStatus , err = ap . ResolveStatusable ( ctx , rsp . Body )
// Tidy up now done.
_ = rsp . Body . Close ( )
2023-05-12 12:15:54 +03:00
if err != nil {
2024-02-23 18:24:40 +03:00
// ResolveStatusable will set gtserror.WrongType
// on the returned error, so we don't need to do it here.
err = gtserror . Newf ( "error resolving statusable %s: %w" , uri , err )
return nil , nil , err
}
// Check whether input URI and final returned URI
// have changed (i.e. we followed some redirects).
if finalURIStr := rsp . Request . URL . String ( ) ; //
finalURIStr != uri . String ( ) {
// NOTE: this URI check + database call is performed
// AFTER reading and closing response body, for performance.
//
// Check whether we have this status stored under *final* URI.
alreadyStatus , err := d . state . DB . GetStatusByURI ( ctx , finalURIStr )
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
return nil , nil , gtserror . Newf ( "db error getting status after redirects: %w" , err )
}
if alreadyStatus != nil {
// We had this status stored
// under discovered final URI.
//
// Proceed with this status.
status = alreadyStatus
}
// Update the input URI to
// the final determined URI
// for later URI checks.
uri = rsp . Request . URL
2023-05-12 12:15:54 +03:00
}
}
2021-08-10 14:32:39 +03:00
2023-06-17 18:49:11 +03:00
// Get the attributed-to account in order to fetch profile.
attributedTo , err := ap . ExtractAttributedToURI ( apubStatus )
2023-05-12 12:15:54 +03:00
if err != nil {
2023-05-28 15:08:35 +03:00
return nil , nil , gtserror . New ( "attributedTo was empty" )
2022-09-12 14:03:23 +03:00
}
2021-08-10 14:32:39 +03:00
2023-10-31 14:12:22 +03:00
// Ensure we have the author account of the status dereferenced (+ up-to-date). If this is a new status
2024-02-14 14:13:38 +03:00
// (i.e. status.AccountID == "") then any error here is irrecoverable. status.AccountID must ALWAYS be set.
2023-10-31 14:12:22 +03:00
if _ , _ , err := d . getAccountByURI ( ctx , requestUser , attributedTo ) ; err != nil && status . AccountID == "" {
return nil , nil , gtserror . Newf ( "failed to dereference status author %s: %w" , uri , err )
2021-08-29 13:03:08 +03:00
}
2023-06-22 22:46:36 +03:00
// ActivityPub model was recently dereferenced, so assume that passed status
// may contain out-of-date information, convert AP model to our GTS model.
2023-09-23 19:44:11 +03:00
latestStatus , err := d . converter . ASStatusToStatus ( ctx , apubStatus )
2023-06-22 22:46:36 +03:00
if err != nil {
return nil , nil , gtserror . Newf ( "error converting statusable to gts model for status %s: %w" , uri , err )
2021-08-29 13:03:08 +03:00
}
2024-02-14 14:13:38 +03:00
// Ensure final status isn't attempting
// to claim being authored by local user.
if latestStatus . Account . IsLocal ( ) {
return nil , nil , gtserror . Newf (
"dereferenced status %s claiming to be local" ,
latestStatus . URI ,
)
}
// Ensure the final parsed status URI / URL matches
// the input URI we fetched (or received) it as.
if expect := uri . String ( ) ; latestStatus . URI != expect &&
latestStatus . URL != expect {
return nil , nil , gtserror . Newf (
"dereferenced status uri %s does not match %s" ,
latestStatus . URI , expect ,
)
}
var isNew bool
2024-01-31 16:29:47 +03:00
// Based on the original provided
// status model, determine whether
// this is a new insert / update.
if isNew = ( status . ID == "" ) ; isNew {
// Generate new status ID from the provided creation date.
2023-05-12 12:15:54 +03:00
latestStatus . ID , err = id . NewULIDFromTime ( latestStatus . CreatedAt )
if err != nil {
2023-11-08 17:32:17 +03:00
log . Errorf ( ctx , "invalid created at date (falling back to 'now'): %v" , err )
latestStatus . ID = id . NewULID ( ) // just use "now"
2021-08-10 14:32:39 +03:00
}
2024-01-26 16:17:10 +03:00
} else {
2024-01-31 16:29:47 +03:00
2024-01-26 16:17:10 +03:00
// Reuse existing status ID.
latestStatus . ID = status . ID
2023-05-12 12:15:54 +03:00
}
2021-08-10 14:32:39 +03:00
2023-05-12 12:15:54 +03:00
// Carry-over values and set fetch time.
2023-11-04 23:21:20 +03:00
latestStatus . UpdatedAt = status . UpdatedAt
2023-05-12 12:15:54 +03:00
latestStatus . FetchedAt = time . Now ( )
latestStatus . Local = status . Local
2024-03-04 15:30:12 +03:00
// Check if this is a permitted status we should accept.
permit , err := d . isPermittedStatus ( ctx , status , latestStatus )
if err != nil {
return nil , nil , gtserror . Newf ( "error checking permissibility for status %s: %w" , uri , err )
}
if ! permit {
// Return a checkable error type that can be ignored.
err := gtserror . Newf ( "dropping unpermitted status: %s" , uri )
return nil , nil , gtserror . SetNotPermitted ( err )
2023-11-08 17:32:17 +03:00
}
2023-05-12 12:15:54 +03:00
// Ensure the status' mentions are populated, and pass in existing to check for changes.
if err := d . fetchStatusMentions ( ctx , requestUser , status , latestStatus ) ; err != nil {
2023-05-28 15:08:35 +03:00
return nil , nil , gtserror . Newf ( "error populating mentions for status %s: %w" , uri , err )
2023-05-12 12:15:54 +03:00
}
2024-03-04 15:30:12 +03:00
// Ensure the status' poll remains consistent, else reset the poll.
if err := d . fetchStatusPoll ( ctx , status , latestStatus ) ; err != nil {
return nil , nil , gtserror . Newf ( "error populating poll for status %s: %w" , uri , err )
}
2023-10-25 17:04:53 +03:00
// Now that we know who this status replies to (handled by ASStatusToStatus)
// and who it mentions, we can add a ThreadID to it if necessary.
if err := d . threadStatus ( ctx , latestStatus ) ; err != nil {
return nil , nil , gtserror . Newf ( "error checking / creating threadID for status %s: %w" , uri , err )
}
2023-10-04 15:09:42 +03:00
// Ensure the status' tags are populated, (changes are expected / okay).
2023-10-31 14:12:22 +03:00
if err := d . fetchStatusTags ( ctx , status , latestStatus ) ; err != nil {
2023-07-31 16:47:35 +03:00
return nil , nil , gtserror . Newf ( "error populating tags for status %s: %w" , uri , err )
}
2023-05-12 12:15:54 +03:00
// Ensure the status' media attachments are populated, passing in existing to check for changes.
if err := d . fetchStatusAttachments ( ctx , tsport , status , latestStatus ) ; err != nil {
2023-05-28 15:08:35 +03:00
return nil , nil , gtserror . Newf ( "error populating attachments for status %s: %w" , uri , err )
2023-05-12 12:15:54 +03:00
}
2023-10-04 15:09:42 +03:00
// Ensure the status' emoji attachments are populated, (changes are expected / okay).
if err := d . fetchStatusEmojis ( ctx , requestUser , latestStatus ) ; err != nil {
2023-05-28 15:08:35 +03:00
return nil , nil , gtserror . Newf ( "error populating emojis for status %s: %w" , uri , err )
2023-05-12 12:15:54 +03:00
}
2024-01-26 16:17:10 +03:00
if isNew {
2023-05-12 12:15:54 +03:00
// This is new, put the status in the database.
err := d . state . DB . PutStatus ( ctx , latestStatus )
2021-08-10 14:32:39 +03:00
if err != nil {
2023-05-28 15:08:35 +03:00
return nil , nil , gtserror . Newf ( "error putting in database: %w" , err )
2021-08-10 14:32:39 +03:00
}
2023-05-12 12:15:54 +03:00
} else {
// This is an existing status, update the model in the database.
if err := d . state . DB . UpdateStatus ( ctx , latestStatus ) ; err != nil {
2023-05-28 15:08:35 +03:00
return nil , nil , gtserror . Newf ( "error updating database: %w" , err )
2023-05-12 12:15:54 +03:00
}
}
return latestStatus , apubStatus , nil
}
2021-08-10 14:32:39 +03:00
2024-03-04 15:30:12 +03:00
// isPermittedStatus returns whether the given status
// is permitted to be stored on this instance, checking
// whether the author is suspended, and passes visibility
// checks against status being replied-to (if any).
func ( d * Dereferencer ) isPermittedStatus (
ctx context . Context ,
existing * gtsmodel . Status ,
status * gtsmodel . Status ,
) (
permitted bool , // is permitted?
err error ,
) {
// our failure condition handling
// at the end of this function for
// the case of permission = false.
onFail := func ( ) ( bool , error ) {
if existing != nil {
log . Infof ( ctx , "deleting unpermitted: %s" , existing . URI )
// Delete existing status from database as it's no longer permitted.
if err := d . state . DB . DeleteStatusByID ( ctx , existing . ID ) ; err != nil {
log . Errorf ( ctx , "error deleting %s after permissivity fail: %v" , existing . URI , err )
}
}
return false , nil
}
if ! status . Account . SuspendedAt . IsZero ( ) {
// The status author is suspended,
// this shouldn't have reached here
// but it's a fast check anyways.
return onFail ( )
}
if status . InReplyToURI == "" {
// This status isn't in
// reply to anything!
return true , nil
}
if status . InReplyTo == nil {
// If no inReplyTo has been set,
// we return here for now as we
// can't perform further checks.
//
// Worst case we allow something
// through, and later on during
// refetch it will get deleted.
return true , nil
}
if status . InReplyTo . BoostOfID != "" {
// We do not permit replies to
// boost wrapper statuses. (this
// shouldn't be able to happen).
return onFail ( )
}
2024-03-14 19:55:35 +03:00
// Default to true
permitted = true
if * status . InReplyTo . Local {
// Check visibility of inReplyTo to status author.
permitted , err = d . visibility . StatusVisible ( ctx ,
status . Account ,
status . InReplyTo ,
)
if err != nil {
return false , gtserror . Newf ( "error checking in-reply-to visibility: %w" , err )
}
2024-03-04 15:30:12 +03:00
}
if permitted &&
* status . InReplyTo . Replyable {
2024-03-14 19:55:35 +03:00
// Status is reply-able to.
2024-03-04 15:30:12 +03:00
return true , nil
}
return onFail ( )
}
2023-10-31 14:05:17 +03:00
// populateMentionTarget tries to populate the given
// mention with the correct TargetAccount and (if not
// yet set) TargetAccountURI, returning the populated
// mention.
//
// Will check on the existing status if the mention
// is already there and populated; if so, existing
// mention will be returned along with `true`.
//
// Otherwise, this function will try to parse first
// the Href of the mention, and then the namestring,
// to see who it targets, and go fetch that account.
func ( d * Dereferencer ) populateMentionTarget (
ctx context . Context ,
mention * gtsmodel . Mention ,
requestUser string ,
existing , status * gtsmodel . Status ,
) (
* gtsmodel . Mention ,
bool , // True if mention already exists in the DB.
error ,
) {
// Mentions can be created using Name or Href.
// Prefer Href (TargetAccountURI), fall back to Name.
if mention . TargetAccountURI != "" {
// Look for existing mention with this URI.
// If we already have it we can return early.
existingMention , ok := existing . GetMentionByTargetURI ( mention . TargetAccountURI )
if ok && existingMention . ID != "" {
return existingMention , true , nil
2021-08-29 13:03:08 +03:00
}
2023-05-12 12:15:54 +03:00
// Ensure that mention account URI is parseable.
accountURI , err := url . Parse ( mention . TargetAccountURI )
if err != nil {
2023-10-31 14:05:17 +03:00
err = gtserror . Newf ( "invalid account uri %q: %w" , mention . TargetAccountURI , err )
return nil , false , err
2021-08-20 13:26:56 +03:00
}
2021-08-10 14:32:39 +03:00
2023-05-12 12:15:54 +03:00
// Ensure we have the account of the mention target dereferenced.
mention . TargetAccount , _ , err = d . getAccountByURI ( ctx , requestUser , accountURI )
2021-08-10 14:32:39 +03:00
if err != nil {
2023-10-31 14:05:17 +03:00
err = gtserror . Newf ( "failed to dereference account %s: %w" , accountURI , err )
return nil , false , err
}
} else {
// Href wasn't set. Find the target account using namestring.
username , domain , err := util . ExtractNamestringParts ( mention . NameString )
if err != nil {
err = gtserror . Newf ( "failed to parse namestring %s: %w" , mention . NameString , err )
return nil , false , err
}
mention . TargetAccount , _ , err = d . getAccountByUsernameDomain ( ctx , requestUser , username , domain )
if err != nil {
err = gtserror . Newf ( "failed to dereference account %s: %w" , mention . NameString , err )
return nil , false , err
}
// Look for existing mention with this URI.
mention . TargetAccountURI = mention . TargetAccount . URI
existingMention , ok := existing . GetMentionByTargetURI ( mention . TargetAccountURI )
if ok && existingMention . ID != "" {
return existingMention , true , nil
}
}
// At this point, mention.TargetAccountURI
// and mention.TargetAccount must be set.
return mention , false , nil
}
func ( d * Dereferencer ) fetchStatusMentions ( ctx context . Context , requestUser string , existing , status * gtsmodel . Status ) error {
// Allocate new slice to take the yet-to-be created mention IDs.
status . MentionIDs = make ( [ ] string , len ( status . Mentions ) )
for i := range status . Mentions {
var (
mention = status . Mentions [ i ]
alreadyExists bool
err error
)
mention , alreadyExists , err = d . populateMentionTarget (
ctx ,
mention ,
requestUser ,
existing ,
status ,
)
if err != nil {
log . Errorf ( ctx , "failed to derive mention: %v" , err )
continue
}
if alreadyExists {
// This mention was already attached
// to the status, use it and continue.
status . Mentions [ i ] = mention
status . MentionIDs [ i ] = mention . ID
2023-05-12 12:15:54 +03:00
continue
2021-08-20 13:26:56 +03:00
}
2023-10-31 14:05:17 +03:00
// This mention didn't exist yet.
2023-05-12 12:15:54 +03:00
// Generate new ID according to status creation.
2023-10-04 15:09:42 +03:00
// TODO: update this to use "edited_at" when we add
// support for edited status revision history.
2023-05-12 12:15:54 +03:00
mention . ID , err = id . NewULIDFromTime ( status . CreatedAt )
if err != nil {
2023-11-08 17:32:17 +03:00
log . Errorf ( ctx , "invalid created at date (falling back to 'now'): %v" , err )
2023-05-12 12:15:54 +03:00
mention . ID = id . NewULID ( ) // just use "now"
2021-08-10 14:32:39 +03:00
}
2023-05-12 12:15:54 +03:00
// Set known further mention details.
mention . CreatedAt = status . CreatedAt
mention . UpdatedAt = status . UpdatedAt
mention . OriginAccount = status . Account
mention . OriginAccountID = status . AccountID
mention . OriginAccountURI = status . AccountURI
mention . TargetAccountID = mention . TargetAccount . ID
mention . TargetAccountURI = mention . TargetAccount . URI
mention . TargetAccountURL = mention . TargetAccount . URL
mention . StatusID = status . ID
mention . Status = status
// Place the new mention into the database.
if err := d . state . DB . PutMention ( ctx , mention ) ; err != nil {
2023-05-28 15:08:35 +03:00
return gtserror . Newf ( "error putting mention in database: %w" , err )
2021-08-10 14:32:39 +03:00
}
2021-08-29 13:03:08 +03:00
2023-05-12 12:15:54 +03:00
// Set the *new* mention and ID.
status . Mentions [ i ] = mention
status . MentionIDs [ i ] = mention . ID
2021-08-10 14:32:39 +03:00
}
2021-08-29 13:03:08 +03:00
2023-06-22 22:46:36 +03:00
for i := 0 ; i < len ( status . MentionIDs ) ; {
2023-05-12 12:15:54 +03:00
if status . MentionIDs [ i ] == "" {
// This is a failed mention population, likely due
// to invalid incoming data / now-deleted accounts.
copy ( status . Mentions [ i : ] , status . Mentions [ i + 1 : ] )
copy ( status . MentionIDs [ i : ] , status . MentionIDs [ i + 1 : ] )
status . Mentions = status . Mentions [ : len ( status . Mentions ) - 1 ]
status . MentionIDs = status . MentionIDs [ : len ( status . MentionIDs ) - 1 ]
2023-06-22 22:46:36 +03:00
continue
2023-05-12 12:15:54 +03:00
}
2023-06-22 22:46:36 +03:00
i ++
2023-05-12 12:15:54 +03:00
}
2021-08-10 14:32:39 +03:00
2021-08-29 13:03:08 +03:00
return nil
}
2023-10-25 17:04:53 +03:00
func ( d * Dereferencer ) threadStatus ( ctx context . Context , status * gtsmodel . Status ) error {
if status . InReplyTo != nil {
if parentThreadID := status . InReplyTo . ThreadID ; parentThreadID != "" {
// Simplest case: parent status
// is threaded, so inherit threadID.
status . ThreadID = parentThreadID
return nil
}
}
// Parent wasn't threaded. If this
// status mentions a local account,
// we should thread it so that local
// account can mute it if they want.
mentionsLocal := slices . ContainsFunc (
status . Mentions ,
func ( m * gtsmodel . Mention ) bool {
// If TargetAccount couldn't
// be deref'd, we know it's not
// a local account, so only
// check for non-nil accounts.
return m . TargetAccount != nil &&
m . TargetAccount . IsLocal ( )
} ,
)
if ! mentionsLocal {
// Status doesn't mention a
// local account, so we don't
// need to thread it.
return nil
}
// Status mentions a local account.
// Create a new thread and assign
// it to the status.
threadID := id . NewULID ( )
if err := d . state . DB . PutThread (
ctx ,
& gtsmodel . Thread {
ID : threadID ,
} ,
) ; err != nil {
return gtserror . Newf ( "error inserting new thread in db: %w" , err )
}
status . ThreadID = threadID
return nil
}
2023-10-31 14:12:22 +03:00
func ( d * Dereferencer ) fetchStatusTags ( ctx context . Context , existing , status * gtsmodel . Status ) error {
2023-07-31 16:47:35 +03:00
// Allocate new slice to take the yet-to-be determined tag IDs.
status . TagIDs = make ( [ ] string , len ( status . Tags ) )
for i := range status . Tags {
2023-10-31 14:12:22 +03:00
tag := status . Tags [ i ]
// Look for tag in existing status with name.
existing , ok := existing . GetTagByName ( tag . Name )
if ok && existing . ID != "" {
status . Tags [ i ] = existing
status . TagIDs [ i ] = existing . ID
continue
}
2023-07-31 16:47:35 +03:00
2023-10-31 14:12:22 +03:00
// Look for existing tag with name in the database.
existing , err := d . state . DB . GetTagByName ( ctx , tag . Name )
2023-07-31 16:47:35 +03:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-10-31 14:12:22 +03:00
return gtserror . Newf ( "db error getting tag %s: %w" , tag . Name , err )
} else if existing != nil {
status . Tags [ i ] = existing
status . TagIDs [ i ] = existing . ID
2023-07-31 16:47:35 +03:00
continue
}
2023-10-31 14:12:22 +03:00
// Create new ID for tag.
tag . ID = id . NewULID ( )
2023-07-31 16:47:35 +03:00
2023-10-31 14:12:22 +03:00
// Insert this tag with new name into the database.
if err := d . state . DB . PutTag ( ctx , tag ) ; err != nil {
log . Errorf ( ctx , "db error putting tag %s: %v" , tag . Name , err )
continue
2023-07-31 16:47:35 +03:00
}
2023-10-31 14:12:22 +03:00
// Set new tag ID in slice.
2023-07-31 16:47:35 +03:00
status . TagIDs [ i ] = tag . ID
}
// Remove any tag we couldn't get or create.
for i := 0 ; i < len ( status . TagIDs ) ; {
if status . TagIDs [ i ] == "" {
// This is a failed tag population, likely due
// to some database peculiarity / race condition.
copy ( status . Tags [ i : ] , status . Tags [ i + 1 : ] )
copy ( status . TagIDs [ i : ] , status . TagIDs [ i + 1 : ] )
status . Tags = status . Tags [ : len ( status . Tags ) - 1 ]
status . TagIDs = status . TagIDs [ : len ( status . TagIDs ) - 1 ]
continue
}
i ++
}
return nil
}
2023-11-08 17:32:17 +03:00
func ( d * Dereferencer ) fetchStatusPoll ( ctx context . Context , existing , status * gtsmodel . Status ) error {
var (
// insertStatusPoll generates ID and inserts the poll attached to status into the database.
insertStatusPoll = func ( ctx context . Context , status * gtsmodel . Status ) error {
var err error
// Generate new ID for poll from the status CreatedAt.
// TODO: update this to use "edited_at" when we add
// support for edited status revision history.
status . Poll . ID , err = id . NewULIDFromTime ( status . CreatedAt )
if err != nil {
log . Errorf ( ctx , "invalid created at date (falling back to 'now'): %v" , err )
status . Poll . ID = id . NewULID ( ) // just use "now"
}
// Update the status<->poll links.
status . PollID = status . Poll . ID
status . Poll . StatusID = status . ID
status . Poll . Status = status
// Insert this latest poll into the database.
err = d . state . DB . PutPoll ( ctx , status . Poll )
if err != nil {
return gtserror . Newf ( "error putting in database: %w" , err )
}
return nil
}
// deleteStatusPoll deletes the poll with ID, and all attached votes, from the database.
deleteStatusPoll = func ( ctx context . Context , pollID string ) error {
if err := d . state . DB . DeletePollByID ( ctx , pollID ) ; err != nil {
return gtserror . Newf ( "error deleting existing poll from database: %w" , err )
}
if err := d . state . DB . DeletePollVotes ( ctx , pollID ) ; err != nil {
return gtserror . Newf ( "error deleting existing votes from database: %w" , err )
}
return nil
}
)
switch {
case existing . Poll == nil && status . Poll == nil :
// no poll before or after, nothing to do.
return nil
case existing . Poll == nil && status . Poll != nil :
// no previous poll, insert new poll!
return insertStatusPoll ( ctx , status )
2023-11-11 13:15:04 +03:00
case status . Poll == nil :
2023-11-08 17:32:17 +03:00
// existing poll has been deleted, remove this.
return deleteStatusPoll ( ctx , existing . PollID )
2023-11-11 13:15:04 +03:00
case pollChanged ( existing . Poll , status . Poll ) :
2023-11-08 17:32:17 +03:00
// poll has changed since original, delete and reinsert new.
if err := deleteStatusPoll ( ctx , existing . PollID ) ; err != nil {
return err
}
return insertStatusPoll ( ctx , status )
2023-11-11 13:15:04 +03:00
case pollUpdated ( existing . Poll , status . Poll ) :
2023-11-08 17:32:17 +03:00
// Since we last saw it, the poll has updated!
// Whether that be stats, or close time.
poll := existing . Poll
2023-11-11 13:15:04 +03:00
poll . Closing = pollJustClosed ( existing . Poll , status . Poll )
2023-11-08 17:32:17 +03:00
poll . ClosedAt = status . Poll . ClosedAt
poll . Voters = status . Poll . Voters
poll . Votes = status . Poll . Votes
// Update poll model in the database (specifically only the possible changed columns).
if err := d . state . DB . UpdatePoll ( ctx , poll , "closed_at" , "voters" , "votes" ) ; err != nil {
return gtserror . Newf ( "error updating poll: %w" , err )
}
// Update poll on status.
status . PollID = poll . ID
status . Poll = poll
return nil
default :
// latest and existing
// polls are up to date.
poll := existing . Poll
status . PollID = poll . ID
status . Poll = poll
return nil
}
}
2023-10-23 12:58:13 +03:00
func ( d * Dereferencer ) fetchStatusAttachments ( ctx context . Context , tsport transport . Transport , existing , status * gtsmodel . Status ) error {
2023-05-12 12:15:54 +03:00
// Allocate new slice to take the yet-to-be fetched attachment IDs.
status . AttachmentIDs = make ( [ ] string , len ( status . Attachments ) )
for i := range status . Attachments {
2023-10-31 14:12:22 +03:00
attachment := status . Attachments [ i ]
2023-05-12 12:15:54 +03:00
2023-11-10 21:29:26 +03:00
// Look for existing media attachment with remote URL first.
2023-10-31 14:12:22 +03:00
existing , ok := existing . GetAttachmentByRemoteURL ( attachment . RemoteURL )
2023-06-22 22:46:36 +03:00
if ok && existing . ID != "" && * existing . Cached {
2023-05-12 12:15:54 +03:00
status . Attachments [ i ] = existing
status . AttachmentIDs [ i ] = existing . ID
continue
}
// Ensure a valid media attachment remote URL.
2023-10-31 14:12:22 +03:00
remoteURL , err := url . Parse ( attachment . RemoteURL )
2023-05-12 12:15:54 +03:00
if err != nil {
2023-10-31 14:12:22 +03:00
log . Errorf ( ctx , "invalid remote media url %q: %v" , attachment . RemoteURL , err )
2023-05-12 12:15:54 +03:00
continue
}
2023-11-10 21:29:26 +03:00
data := func ( ctx context . Context ) ( io . ReadCloser , int64 , error ) {
2023-05-12 12:15:54 +03:00
return tsport . DereferenceMedia ( ctx , remoteURL )
2023-11-10 21:29:26 +03:00
}
ai := & media . AdditionalMediaInfo {
2023-05-12 12:15:54 +03:00
StatusID : & status . ID ,
2023-10-31 14:12:22 +03:00
RemoteURL : & attachment . RemoteURL ,
Description : & attachment . Description ,
Blurhash : & attachment . Blurhash ,
2022-01-08 19:17:01 +03:00
}
2023-11-10 21:29:26 +03:00
// Start pre-processing remote media at remote URL.
processing := d . mediaManager . PreProcessMedia ( data , status . AccountID , ai )
2023-05-12 12:15:54 +03:00
// Force attachment loading *right now*.
2023-10-31 14:12:22 +03:00
attachment , err = processing . LoadAttachment ( ctx )
2022-01-08 19:17:01 +03:00
if err != nil {
2023-11-10 21:29:26 +03:00
if attachment == nil {
// Totally failed to load;
// bail on this attachment.
log . Errorf ( ctx , "error loading attachment: %v" , err )
continue
}
// Partially loaded. Keep as
// placeholder and try again later.
log . Warnf ( ctx , "partially loaded attachment: %v" , err )
2021-08-29 13:03:08 +03:00
}
2023-05-12 12:15:54 +03:00
// Set the *new* attachment and ID.
2023-10-31 14:12:22 +03:00
status . Attachments [ i ] = attachment
status . AttachmentIDs [ i ] = attachment . ID
2021-08-29 13:03:08 +03:00
}
2023-06-22 22:46:36 +03:00
for i := 0 ; i < len ( status . AttachmentIDs ) ; {
2023-05-12 12:15:54 +03:00
if status . AttachmentIDs [ i ] == "" {
2023-11-10 21:29:26 +03:00
// Remove totally failed attachment populations
2023-05-12 12:15:54 +03:00
copy ( status . Attachments [ i : ] , status . Attachments [ i + 1 : ] )
copy ( status . AttachmentIDs [ i : ] , status . AttachmentIDs [ i + 1 : ] )
status . Attachments = status . Attachments [ : len ( status . Attachments ) - 1 ]
status . AttachmentIDs = status . AttachmentIDs [ : len ( status . AttachmentIDs ) - 1 ]
2023-06-22 22:46:36 +03:00
continue
2023-05-12 12:15:54 +03:00
}
2023-06-22 22:46:36 +03:00
i ++
2023-05-12 12:15:54 +03:00
}
2021-08-29 13:03:08 +03:00
return nil
}
2023-10-23 12:58:13 +03:00
func ( d * Dereferencer ) fetchStatusEmojis ( ctx context . Context , requestUser string , status * gtsmodel . Status ) error {
2023-05-12 12:15:54 +03:00
// Fetch the full-fleshed-out emoji objects for our status.
emojis , err := d . populateEmojis ( ctx , status . Emojis , requestUser )
2022-09-26 12:56:01 +03:00
if err != nil {
2023-05-28 15:08:35 +03:00
return gtserror . Newf ( "failed to populate emojis: %w" , err )
2022-09-26 12:56:01 +03:00
}
2022-09-12 14:03:23 +03:00
2023-05-12 12:15:54 +03:00
// Iterate over and get their IDs.
2022-09-26 12:56:01 +03:00
emojiIDs := make ( [ ] string , 0 , len ( emojis ) )
for _ , e := range emojis {
emojiIDs = append ( emojiIDs , e . ID )
2022-09-12 14:03:23 +03:00
}
2023-05-12 12:15:54 +03:00
// Set known emoji details.
2022-09-26 12:56:01 +03:00
status . Emojis = emojis
2022-09-12 14:03:23 +03:00
status . EmojiIDs = emojiIDs
2021-08-29 13:03:08 +03:00
2021-08-10 14:32:39 +03:00
return nil
}