Add force pull functionality and improve post content handling

This commit is contained in:
Matt Batchelder
2026-02-23 19:35:33 -05:00
parent 3b51382797
commit cdf176e224
5 changed files with 165 additions and 23 deletions

View File

@@ -11,6 +11,93 @@ if ( ! defined( 'ABSPATH' ) ) exit;
// ─── Helpers ──────────────────────────────────────────────────────────────────
/**
* Insert or update a post while writing post_content DIRECTLY to the DB.
*
* Every code-path in wp_insert_post / wp_update_post runs the content through
* sanitize_post_field() → apply_filters('pre_post_content') and
* apply_filters('content_save_pre'), both of which have wp_kses_post
* callbacks that turn & (inside Gutenberg block JSON) into &.
* kses_remove_filters() only unhooks content_save_pre, NOT pre_post_content,
* so the ampersand corruption survived even with those wrappers.
*
* This helper lets WP create/update every other field normally (title, slug,
* status, dates, author …) with an empty content placeholder, then immediately
* overwrites post_content in the DB directly — no filters, no escaping beyond
* the $wpdb placeholder.
*
* @param array $post_arr Same shape as wp_insert_post / wp_update_post.
* @return int|WP_Error Post ID on success, WP_Error on failure.
*/
function oribi_sync_save_post( array $post_arr ) {
global $wpdb;
$content = $post_arr['post_content'] ?? '';
$post_arr['post_content'] = ''; // let WP handle everything else
if ( ! empty( $post_arr['ID'] ) ) {
$post_id = wp_update_post( $post_arr, true );
} else {
$post_id = wp_insert_post( $post_arr, true );
}
if ( is_wp_error( $post_id ) ) {
return $post_id;
}
$wpdb->update(
$wpdb->posts,
[ 'post_content' => $content ],
[ 'ID' => (int) $post_id ],
[ '%s' ],
[ '%d' ]
);
clean_post_cache( (int) $post_id );
return $post_id;
}
/**
* Clean previously-corrupted Gutenberg block content.
*
* Old syncs ran content through wp_kses_post which HTML-entity-encoded `&`
* inside JSON attributes to `&`. php's json_encode then re-encoded that
* `&` to `\u0026`, producing `\u0026amp;` instead of just `\u0026`.
*
* This function corrects those artefacts so block JSON attributes contain
* the right unicode escape sequences.
*
* Also normalises plain `&` → `&` inside JSON block comments so the
* next round of json_encode produces a single clean `\u0026`.
*
* @param string $content Gutenberg block HTML.
* @return string Cleaned block HTML.
*/
function oribi_sync_clean_block_content( string $content ): string {
// json_encode always hex-escapes & as \u0026 (even with JSON_UNESCAPED_UNICODE,
// which only affects codepoints > U+007F). Previous syncs also ran content
// through wp_kses_post which turned & into &, so json_encode then produced
// \u0026amp; instead of just \u0026.
//
// Fix the double-encoded forms first, then unescape the remaining \u0026 back
// to literal & — Gutenberg's block JSON parser treats both identically.
// These sequences are unambiguous in Gutenberg block comment JSON.
$content = str_replace( '\u0026amp;', '&', $content );
$content = str_replace( '\u0026lt;', '<', $content );
$content = str_replace( '\u0026gt;', '>', $content );
$content = str_replace( '\u0026quot;', '"', $content );
$content = str_replace( '\u0026#039;', "'", $content );
// Clean any remaining plain hex-escapes of ASCII punctuation
$content = str_replace( '\u0026', '&', $content );
$content = str_replace( '\u003C', '<', $content );
$content = str_replace( '\u003E', '>', $content );
$content = str_replace( '\u0022', '"', $content );
$content = str_replace( '\u0027', "'", $content );
return $content;
}
/**
* Strip a case-insensitive directory prefix from a file path.
*
@@ -29,14 +116,26 @@ function oribi_sync_strip_prefix( string $path, string $prefix ): string {
/** Generate a self-closing block comment (standalone or child blocks). */
if ( ! function_exists( 'oribi_b' ) ) {
function oribi_b( $name, $attrs = [] ) {
return '<!-- wp:oribi/' . $name . ' ' . wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ) . ' /-->';
$json = wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES );
// json_encode always hex-escapes & < > ' for XSS safety, but these are
// inside HTML comments so they are safe as literals in Gutenberg block JSON.
$json = str_replace( [ '\u0026', '\u003C', '\u003E', '\u0022', '\u0027' ],
[ '&', '<', '>', '"', "'" ], $json );
return '<!-- wp:oribi/' . $name . ' ' . $json . ' /-->';
}
}
/** Generate an opening tag for a parent block comment. */
if ( ! function_exists( 'oribi_b_open' ) ) {
function oribi_b_open( $name, $attrs = [] ) {
$json = ! empty( $attrs ) ? ' ' . wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ) : '';
if ( ! empty( $attrs ) ) {
$json = wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES );
$json = str_replace( [ '\u0026', '\u003C', '\u003E', '\u0022', '\u0027' ],
[ '&', '<', '>', '"', "'" ], $json );
$json = ' ' . $json;
} else {
$json = '';
}
return '<!-- wp:oribi/' . $name . $json . ' -->';
}
}
@@ -100,10 +199,11 @@ function oribi_sync_execute_php( string $php_source, string $slug ) {
* Run the full page sync.
*
* @param bool $dry_run If true, returns what would happen without making changes.
* @param bool $force If true, bypasses SHA-based change detection and re-pulls all files.
*
* @return array{ok: bool, created: string[], updated: string[], trashed: string[], skipped: string[], errors: string[]}
*/
function oribi_sync_run( bool $dry_run = false ): array {
function oribi_sync_run( bool $dry_run = false, bool $force = false ): array {
$result = [
'ok' => true,
'created' => [],
@@ -177,7 +277,7 @@ function oribi_sync_run( bool $dry_run = false ): array {
$git_sha = $entry['sha'] ?? '';
$stored_git_sha = $existing ? get_post_meta( $existing->ID, '_oribi_sync_git_sha', true ) : '';
if ( $existing && ! empty( $git_sha ) && $git_sha === $stored_git_sha ) {
if ( ! $force && $existing && ! empty( $git_sha ) && $git_sha === $stored_git_sha ) {
$result['skipped'][] = $slug . ' (unchanged)';
if ( ! $dry_run ) {
update_post_meta( $existing->ID, '_oribi_sync_last_run', current_time( 'mysql' ) );
@@ -209,6 +309,9 @@ function oribi_sync_run( bool $dry_run = false ): array {
$content = $raw_content;
}
// Clean any corruption from previous syncs (e.g. \u0026amp; artefacts)
$content = oribi_sync_clean_block_content( $content );
// Checksum based on raw source — used as fallback for providers without tree SHA
$checksum = hash( 'sha256', $raw_content );
@@ -238,11 +341,11 @@ function oribi_sync_run( bool $dry_run = false ): array {
}
}
$update_result = wp_update_post( [
$update_result = oribi_sync_save_post( [
'ID' => $existing->ID,
'post_content' => $content,
'post_status' => 'publish',
], true );
] );
if ( is_wp_error( $update_result ) ) {
$result['errors'][] = $slug . ': ' . $update_result->get_error_message();
@@ -261,13 +364,13 @@ function oribi_sync_run( bool $dry_run = false ): array {
// Create new page
$title = oribi_sync_slug_to_title( $slug );
$post_id = wp_insert_post( [
$post_id = oribi_sync_save_post( [
'post_title' => $title,
'post_name' => $slug,
'post_status' => 'publish',
'post_type' => 'page',
'post_content' => $content,
], true );
] );
if ( is_wp_error( $post_id ) ) {
$result['errors'][] = $slug . ': ' . $post_id->get_error_message();
@@ -624,14 +727,16 @@ function oribi_sync_pull_page_from_repo( int $post_id ): array {
}
if ( $content !== null ) {
// Clean any corruption from previous syncs
$content = oribi_sync_clean_block_content( $content );
$checksum = hash( 'sha256', $raw_content );
$git_sha = $target_entry['sha'] ?? '';
$update = wp_update_post( [
$update = oribi_sync_save_post( [
'ID' => $post->ID,
'post_content' => $content,
'post_status' => 'publish',
], true );
] );
if ( is_wp_error( $update ) ) {
$result['errors'][] = $slug . ': ' . $update->get_error_message();