624 lines
17 KiB
PHP
624 lines
17 KiB
PHP
<?php
|
|
/**
|
|
* Remote Posts collection file.
|
|
*
|
|
* @package Activitypub
|
|
*/
|
|
|
|
namespace Activitypub\Collection;
|
|
|
|
use Activitypub\Emoji;
|
|
use Activitypub\Sanitize;
|
|
|
|
use function Activitypub\generate_post_summary;
|
|
use function Activitypub\object_to_uri;
|
|
use function Activitypub\process_remote_media;
|
|
|
|
/**
|
|
* Remote Posts collection.
|
|
*
|
|
* Provides methods to retrieve, create, update, and manage remote
|
|
* ActivityPub posts (articles, notes, media, etc.) received via
|
|
* Server-to-Server (S2S) federation.
|
|
*
|
|
* @see Posts for local posts created via Client-to-Server (C2S) outbox.
|
|
*/
|
|
class Remote_Posts {
|
|
/**
|
|
* The post type for the posts.
|
|
*
|
|
* @var string
|
|
*/
|
|
const POST_TYPE = 'ap_post';
|
|
|
|
/**
|
|
* Maximum number of remote post items to keep.
|
|
*
|
|
* @var int
|
|
*/
|
|
const MAX_ITEMS = 5000;
|
|
|
|
/**
|
|
* Number of items to process per batch during purge.
|
|
*
|
|
* @var int
|
|
*/
|
|
const PURGE_BATCH_SIZE = 100;
|
|
|
|
/**
|
|
* Maximum seconds a purge run may take before yielding.
|
|
*
|
|
* @var int
|
|
*/
|
|
const PURGE_TIMEOUT = 30;
|
|
|
|
/**
|
|
* Add an object to the collection.
|
|
*
|
|
* @param array $activity The activity object data.
|
|
* @param int|int[] $recipients The id(s) of the local blog-user(s).
|
|
*
|
|
* @return \WP_Post|\WP_Error The object post or WP_Error on failure.
|
|
*/
|
|
public static function add( $activity, $recipients ) {
|
|
$recipients = (array) $recipients;
|
|
$activity_object = $activity['object'];
|
|
|
|
$existing = self::get_by_guid( $activity_object['id'] );
|
|
// If post exists, call update instead.
|
|
if ( ! \is_wp_error( $existing ) ) {
|
|
return self::update( $activity, $recipients );
|
|
}
|
|
|
|
// Post doesn't exist, create new post.
|
|
$actor = Remote_Actors::fetch_by_uri( object_to_uri( $activity_object['attributedTo'] ) );
|
|
|
|
if ( \is_wp_error( $actor ) ) {
|
|
return $actor;
|
|
}
|
|
|
|
$post_array = self::activity_to_post( $activity_object );
|
|
$post_id = \wp_insert_post( $post_array, true );
|
|
|
|
if ( \is_wp_error( $post_id ) ) {
|
|
return $post_id;
|
|
}
|
|
|
|
\add_post_meta( $post_id, '_activitypub_remote_actor_id', $actor->ID );
|
|
|
|
// Add recipients as separate meta entries after post is created.
|
|
foreach ( $recipients as $user_id ) {
|
|
self::add_recipient( $post_id, $user_id );
|
|
}
|
|
|
|
self::add_taxonomies( $post_id, $activity_object );
|
|
|
|
return \get_post( $post_id );
|
|
}
|
|
|
|
/**
|
|
* Get an object from the collection.
|
|
*
|
|
* @param int $id The object ID.
|
|
*
|
|
* @return \WP_Post|null The post object or null on failure.
|
|
*/
|
|
public static function get( $id ) {
|
|
return \get_post( $id );
|
|
}
|
|
|
|
/**
|
|
* Get an object by its GUID.
|
|
*
|
|
* @param string $guid The object GUID.
|
|
*
|
|
* @return \WP_Post|\WP_Error The object post or WP_Error on failure.
|
|
*/
|
|
public static function get_by_guid( $guid ) {
|
|
global $wpdb;
|
|
// phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching
|
|
$post_id = $wpdb->get_var(
|
|
$wpdb->prepare(
|
|
"SELECT ID FROM $wpdb->posts WHERE guid=%s AND post_type=%s",
|
|
\esc_url( $guid ),
|
|
self::POST_TYPE
|
|
)
|
|
);
|
|
|
|
if ( ! $post_id ) {
|
|
return new \WP_Error(
|
|
'activitypub_post_not_found',
|
|
\__( 'Post not found', 'activitypub' ),
|
|
array( 'status' => 404 )
|
|
);
|
|
}
|
|
|
|
return \get_post( $post_id );
|
|
}
|
|
|
|
/**
|
|
* Update an object in the collection.
|
|
*
|
|
* @param array $activity The activity object data.
|
|
* @param int|int[] $recipients The id(s) of the local blog-user(s).
|
|
*
|
|
* @return \WP_Post|\WP_Error The updated object post or WP_Error on failure.
|
|
*/
|
|
public static function update( $activity, $recipients ) {
|
|
$recipients = (array) $recipients;
|
|
|
|
$post = self::get_by_guid( $activity['object']['id'] );
|
|
if ( \is_wp_error( $post ) ) {
|
|
return $post;
|
|
}
|
|
|
|
$post_array = self::activity_to_post( $activity['object'] );
|
|
$post_array['ID'] = $post->ID;
|
|
$post_id = \wp_update_post( $post_array, true );
|
|
|
|
if ( \is_wp_error( $post_id ) ) {
|
|
return $post_id;
|
|
}
|
|
|
|
// Add new recipients using add_recipient (handles deduplication).
|
|
foreach ( $recipients as $user_id ) {
|
|
self::add_recipient( $post_id, $user_id );
|
|
}
|
|
|
|
self::add_taxonomies( $post_id, $activity['object'] );
|
|
|
|
return \get_post( $post_id );
|
|
}
|
|
|
|
/**
|
|
* Delete an object from the collection.
|
|
*
|
|
* @param int $id The object ID.
|
|
*
|
|
* @return \WP_Post|false|null Post data on success, false or null on failure.
|
|
*/
|
|
public static function delete( $id ) {
|
|
return \wp_delete_post( $id, true );
|
|
}
|
|
|
|
/**
|
|
* Delete an object from the collection by its GUID.
|
|
*
|
|
* @param string $guid The object GUID.
|
|
*
|
|
* @return \WP_Post|\WP_Error|false|null Post data on success, false or null on failure, or WP_Error if no post to delete.
|
|
*/
|
|
public static function delete_by_guid( $guid ) {
|
|
$post = self::get_by_guid( $guid );
|
|
if ( \is_wp_error( $post ) ) {
|
|
return $post;
|
|
}
|
|
|
|
return self::delete( $post->ID );
|
|
}
|
|
|
|
/**
|
|
* Extract hashtag names from ActivityPub tag array.
|
|
*
|
|
* @param array $tags Array of ActivityPub tags.
|
|
*
|
|
* @return array Array of normalized hashtag names (without # prefix, trimmed, sanitized).
|
|
*/
|
|
public static function extract_hashtags( $tags ) {
|
|
$hashtags = array();
|
|
|
|
if ( empty( $tags ) || ! \is_array( $tags ) ) {
|
|
return $hashtags;
|
|
}
|
|
|
|
foreach ( $tags as $tag ) {
|
|
if ( isset( $tag['type'] ) && 'Hashtag' === $tag['type'] && isset( $tag['name'] ) ) {
|
|
// Strip # prefix, trim whitespace, and sanitize.
|
|
$normalized = \trim( \ltrim( $tag['name'], '#' ) );
|
|
$normalized = \wp_strip_all_tags( $normalized );
|
|
|
|
if ( ! empty( $normalized ) ) {
|
|
$hashtags[] = $normalized;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $hashtags;
|
|
}
|
|
|
|
/**
|
|
* Remove hashtags from content.
|
|
*
|
|
* Removes hashtags that appear at the end of the content.
|
|
* Handles both plain text and HTML content, including hashtags within anchor tags.
|
|
*
|
|
* @param string $content The content to process.
|
|
* @param array $tags Array of tag objects from activity (with 'type' and 'name' keys).
|
|
*
|
|
* @return string The content with trailing hashtags removed.
|
|
*/
|
|
public static function remove_hashtags( $content, $tags ) {
|
|
if ( empty( $content ) || empty( $tags ) || ! \is_array( $tags ) ) {
|
|
return $content;
|
|
}
|
|
|
|
// Extract and normalize hashtags from tag objects.
|
|
$normalized_tags = self::extract_hashtags( $tags );
|
|
|
|
if ( empty( $normalized_tags ) ) {
|
|
return $content;
|
|
}
|
|
|
|
// Build pattern to match trailing hashtags (at end of content or before closing tags).
|
|
$tag_patterns = array();
|
|
foreach ( $normalized_tags as $tag ) {
|
|
$escaped_tag = \preg_quote( $tag, '/' );
|
|
$tag_patterns[] = '(?:<a[^>]*>\s*)?#' . $escaped_tag . '(?=\s|<|$)(?:\s*<\/a>)?';
|
|
}
|
|
|
|
/*
|
|
* Pattern explanation:
|
|
* Match one or more hashtags (plain or in anchor tags) at the end of content.
|
|
* The pattern matches trailing hashtags before closing HTML tags or at end of string.
|
|
*/
|
|
$pattern = '/(?:\s+(?:' . \implode( '|', $tag_patterns ) . '))+(?=\s*(?:<\/[^>]+>)*\s*$)/i';
|
|
$content = \preg_replace( $pattern, '', $content );
|
|
|
|
// Clean up any extra whitespace at end of paragraphs.
|
|
$content = \preg_replace( '/<p>\s*<\/p>/', '', $content );
|
|
$content = \preg_replace( '/\s+<\/p>/', '</p>', $content );
|
|
$content = \preg_replace( '/\s+<\/strong>/', '</strong>', $content );
|
|
|
|
return \trim( $content );
|
|
}
|
|
|
|
/**
|
|
* Convert an activity to a post array.
|
|
*
|
|
* @param array $activity The activity array.
|
|
*
|
|
* @return array|\WP_Error The post array or WP_Error on failure.
|
|
*/
|
|
private static function activity_to_post( $activity ) {
|
|
if ( ! \is_array( $activity ) ) {
|
|
return new \WP_Error( 'invalid_activity', \__( 'Invalid activity format', 'activitypub' ) );
|
|
}
|
|
|
|
$gm_date = \gmdate( 'Y-m-d H:i:s', \strtotime( $activity['published'] ?? 'now' ) );
|
|
|
|
// Sanitize content and remove hashtags.
|
|
$content = isset( $activity['content'] ) ? Sanitize::content( $activity['content'] ) : '';
|
|
$content = self::remove_hashtags( $content, $activity['tag'] ?? array() );
|
|
$content = Emoji::wrap_in_content( $content, $activity );
|
|
|
|
// Process remote media: wrap inline images and append attachments.
|
|
$attachments = self::extract_attachments( $activity );
|
|
$content = process_remote_media( $content, $attachments );
|
|
|
|
return array(
|
|
'post_title' => isset( $activity['name'] ) ? \wp_strip_all_tags( $activity['name'] ) : '',
|
|
'post_content' => $content,
|
|
'post_excerpt' => isset( $activity['summary'] ) ? \wp_strip_all_tags( $activity['summary'] ) : generate_post_summary( $activity['content'] ?? '' ),
|
|
'post_status' => 'publish',
|
|
'post_type' => self::POST_TYPE,
|
|
'post_date_gmt' => $gm_date,
|
|
'post_date' => \get_date_from_gmt( $gm_date ),
|
|
'guid' => isset( $activity['id'] ) ? \esc_url_raw( $activity['id'] ) : '',
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Add taxonomies to the object post.
|
|
*
|
|
* @param int $post_id The post ID.
|
|
* @param array $activity_object The activity object data.
|
|
*/
|
|
private static function add_taxonomies( $post_id, $activity_object ) {
|
|
// Save Object Type as Taxonomy item.
|
|
\wp_set_post_terms( $post_id, array( $activity_object['type'] ), 'ap_object_type' );
|
|
|
|
// Save the Hashtags as Taxonomy items.
|
|
$tags = self::extract_hashtags( $activity_object['tag'] ?? array() );
|
|
|
|
\wp_set_post_terms( $post_id, $tags, 'ap_tag' );
|
|
}
|
|
|
|
/**
|
|
* Extract media attachments from an activity object.
|
|
*
|
|
* Extracts attachments with URL, alt text, and media type for appending to content.
|
|
*
|
|
* @param array $activity_object The activity object data.
|
|
*
|
|
* @return array Array of attachments with 'url', 'alt', and 'type' keys.
|
|
*/
|
|
private static function extract_attachments( $activity_object ) {
|
|
if ( empty( $activity_object['attachment'] ) || ! \is_array( $activity_object['attachment'] ) ) {
|
|
return array();
|
|
}
|
|
|
|
$attachments = array();
|
|
foreach ( $activity_object['attachment'] as $attachment ) {
|
|
if ( \is_object( $attachment ) ) {
|
|
$attachment = \get_object_vars( $attachment );
|
|
}
|
|
|
|
if ( empty( $attachment['url'] ) ) {
|
|
continue;
|
|
}
|
|
|
|
$mime_type = $attachment['mediaType'] ?? '';
|
|
|
|
if ( \str_starts_with( $mime_type, 'video/' ) ) {
|
|
$type = 'video';
|
|
} elseif ( \str_starts_with( $mime_type, 'audio/' ) ) {
|
|
$type = 'audio';
|
|
} else {
|
|
$type = 'image';
|
|
}
|
|
|
|
$attachments[] = array(
|
|
'url' => $attachment['url'],
|
|
'alt' => $attachment['name'] ?? '',
|
|
'type' => $type,
|
|
);
|
|
}
|
|
|
|
return $attachments;
|
|
}
|
|
|
|
/**
|
|
* Get posts by remote actor.
|
|
*
|
|
* @param string $actor The remote actor URI.
|
|
*
|
|
* @return array Array of WP_Post objects.
|
|
*/
|
|
public static function get_by_remote_actor( $actor ) {
|
|
$remote_actor = Remote_Actors::fetch_by_uri( $actor );
|
|
|
|
if ( \is_wp_error( $remote_actor ) ) {
|
|
return array();
|
|
}
|
|
|
|
return self::get_by_remote_actor_id( $remote_actor->ID );
|
|
}
|
|
|
|
/**
|
|
* Get posts by remote actor ID.
|
|
*
|
|
* @param int $actor_id The remote actor post ID.
|
|
*
|
|
* @return array Array of WP_Post objects.
|
|
*/
|
|
public static function get_by_remote_actor_id( $actor_id ) {
|
|
$query = new \WP_Query(
|
|
array(
|
|
'post_type' => self::POST_TYPE,
|
|
'posts_per_page' => -1,
|
|
// phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_key
|
|
'meta_key' => '_activitypub_remote_actor_id',
|
|
// phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_value
|
|
'meta_value' => $actor_id,
|
|
)
|
|
);
|
|
|
|
return $query->posts;
|
|
}
|
|
|
|
/**
|
|
* Get all recipients for a post.
|
|
*
|
|
* @param int $post_id The post ID.
|
|
*
|
|
* @return int[] Array of user IDs who are recipients.
|
|
*/
|
|
public static function get_recipients( $post_id ) {
|
|
// Get all meta values with key '_activitypub_user_id' (single => false).
|
|
$recipients = \get_post_meta( $post_id, '_activitypub_user_id', false );
|
|
$recipients = \array_map( 'intval', $recipients );
|
|
|
|
return $recipients;
|
|
}
|
|
|
|
/**
|
|
* Check if a user is a recipient of a post.
|
|
*
|
|
* @param int $post_id The post ID.
|
|
* @param int $user_id The user ID to check.
|
|
*
|
|
* @return bool True if user is a recipient, false otherwise.
|
|
*/
|
|
public static function has_recipient( $post_id, $user_id ) {
|
|
$recipients = self::get_recipients( $post_id );
|
|
|
|
return \in_array( (int) $user_id, $recipients, true );
|
|
}
|
|
|
|
/**
|
|
* Add a recipient to an existing post.
|
|
*
|
|
* @param int $post_id The post ID.
|
|
* @param int $user_id The user ID to add.
|
|
*
|
|
* @return bool True on success, false on failure.
|
|
*/
|
|
public static function add_recipient( $post_id, $user_id ) {
|
|
$user_id = (int) $user_id;
|
|
// Allow 0 for blog user, but reject negative values.
|
|
if ( $user_id < 0 ) {
|
|
return false;
|
|
}
|
|
|
|
// Check if already a recipient.
|
|
if ( self::has_recipient( $post_id, $user_id ) ) {
|
|
return true;
|
|
}
|
|
|
|
// Add new recipient as separate meta entry.
|
|
return (bool) \add_post_meta( $post_id, '_activitypub_user_id', $user_id, false );
|
|
}
|
|
|
|
/**
|
|
* Add multiple recipients to an existing post.
|
|
*
|
|
* @param int $post_id The post ID.
|
|
* @param int[] $user_ids The user ID or array of user IDs to add.
|
|
*/
|
|
public static function add_recipients( $post_id, $user_ids ) {
|
|
foreach ( $user_ids as $user_id ) {
|
|
self::add_recipient( $post_id, $user_id );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Remove a recipient from a post.
|
|
*
|
|
* @param int $post_id The post ID.
|
|
* @param int $user_id The user ID to remove.
|
|
*
|
|
* @return bool True on success, false on failure.
|
|
*/
|
|
public static function remove_recipient( $post_id, $user_id ) {
|
|
$user_id = (int) $user_id;
|
|
|
|
// Allow 0 for blog user, but reject negative values.
|
|
if ( $user_id < 0 ) {
|
|
return false;
|
|
}
|
|
|
|
// Delete the specific meta entry with this value.
|
|
return \delete_post_meta( $post_id, '_activitypub_user_id', $user_id );
|
|
}
|
|
|
|
/**
|
|
* Delete all posts.
|
|
*
|
|
* Used during plugin uninstall to clean up all remote posts.
|
|
*
|
|
* @return int The number of posts deleted.
|
|
*/
|
|
public static function delete_all() {
|
|
$post_ids = \get_posts(
|
|
array(
|
|
'post_type' => self::POST_TYPE,
|
|
'post_status' => array( 'any', 'trash', 'auto-draft' ),
|
|
'fields' => 'ids',
|
|
'numberposts' => -1,
|
|
)
|
|
);
|
|
|
|
foreach ( $post_ids as $post_id ) {
|
|
\wp_delete_post( $post_id, true );
|
|
}
|
|
|
|
return count( $post_ids );
|
|
}
|
|
|
|
/**
|
|
* Purge old remote posts.
|
|
*
|
|
* Deletes remote posts older than the specified number of days,
|
|
* but preserves posts that have comments from local users
|
|
* as these indicate meaningful local interactions.
|
|
*
|
|
* @param int $days Number of days to keep items. Items older than this will be deleted.
|
|
*
|
|
* @return int The number of items deleted.
|
|
*/
|
|
public static function purge( $days ) {
|
|
if ( $days <= 0 ) {
|
|
return 0;
|
|
}
|
|
|
|
$counts = \wp_count_posts( self::POST_TYPE );
|
|
$total = 0;
|
|
foreach ( $counts as $count ) {
|
|
$total += (int) $count;
|
|
}
|
|
|
|
if ( $total <= 200 ) {
|
|
return 0;
|
|
}
|
|
|
|
global $wpdb;
|
|
|
|
$deleted = 0;
|
|
$cutoff = \gmdate( 'Y-m-d', \time() - ( $days * DAY_IN_SECONDS ) );
|
|
$start_time = \time();
|
|
$exclude = array();
|
|
|
|
// If total exceeds the hard cap, drop the date filter to purge oldest items first.
|
|
$overflow = $total > self::MAX_ITEMS;
|
|
$date_query = array(
|
|
array(
|
|
'before' => $cutoff,
|
|
),
|
|
);
|
|
|
|
$query_args = array(
|
|
'post_type' => self::POST_TYPE,
|
|
'post_status' => 'any',
|
|
'fields' => 'ids',
|
|
'numberposts' => self::PURGE_BATCH_SIZE,
|
|
'orderby' => 'date',
|
|
'order' => 'ASC',
|
|
);
|
|
|
|
if ( ! $overflow ) {
|
|
$query_args['date_query'] = $date_query;
|
|
}
|
|
|
|
do {
|
|
$query_args['exclude'] = $exclude;
|
|
$post_ids = \get_posts( $query_args );
|
|
|
|
if ( empty( $post_ids ) ) {
|
|
break;
|
|
}
|
|
|
|
// Batch-fetch post IDs that have local user comments (single query per batch).
|
|
$placeholders = \implode( ',', \array_fill( 0, \count( $post_ids ), '%d' ) );
|
|
|
|
// phpcs:ignore WordPress.DB.DirectDatabaseQuery
|
|
$commented_post_ids = $wpdb->get_col(
|
|
// phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared, WordPress.DB.PreparedSQLPlaceholders
|
|
$wpdb->prepare( "SELECT DISTINCT comment_post_ID FROM $wpdb->comments WHERE comment_post_ID IN ($placeholders) AND user_id > 0", $post_ids )
|
|
);
|
|
$commented_post_ids = \array_flip( $commented_post_ids );
|
|
|
|
foreach ( $post_ids as $post_id ) {
|
|
/**
|
|
* Filter whether to preserve a specific ap_post from being purged.
|
|
*
|
|
* @param bool $preserve Whether to preserve this post. Default false.
|
|
* @param int $post_id The ap_post ID being considered for deletion.
|
|
*
|
|
* @return bool Whether to preserve this post from deletion.
|
|
*/
|
|
if ( \apply_filters( 'activitypub_preserve_ap_post', false, $post_id ) ) {
|
|
$exclude[] = $post_id;
|
|
continue;
|
|
}
|
|
|
|
// Preserve posts with comments from local users.
|
|
if ( isset( $commented_post_ids[ $post_id ] ) ) {
|
|
$exclude[] = $post_id;
|
|
continue;
|
|
}
|
|
|
|
\wp_delete_post( $post_id, true );
|
|
++$deleted;
|
|
}
|
|
|
|
// Once we're back under the cap, re-apply the date filter.
|
|
if ( $overflow && ( $total - $deleted ) <= self::MAX_ITEMS ) {
|
|
$overflow = false;
|
|
$query_args['date_query'] = $date_query;
|
|
}
|
|
} while ( ! empty( $post_ids ) && ( \time() - $start_time ) < self::PURGE_TIMEOUT );
|
|
|
|
return $deleted;
|
|
}
|
|
}
|