Add force pull functionality and improve post content handling

This commit is contained in:
Matt Batchelder
2026-02-23 19:35:33 -05:00
parent 3b51382797
commit cdf176e224
5 changed files with 165 additions and 23 deletions

Binary file not shown.

View File

@@ -257,6 +257,18 @@ add_action( 'admin_post_oribi_sync_pull', function () {
exit;
} );
add_action( 'admin_post_oribi_sync_force_pull', function () {
if ( ! current_user_can( 'manage_options' ) ) wp_die( 'Permission denied.' );
check_admin_referer( 'oribi_sync_force_pull' );
$result = oribi_sync_run( false, true );
set_transient( 'oribi_sync_result', $result, 60 );
wp_redirect( add_query_arg( 'oribi_sync_done', 'force_pull', admin_url( 'options-general.php?page=oribi-sync' ) ) );
exit;
} );
add_action( 'admin_post_oribi_sync_clear_pat', function () {
if ( ! current_user_can( 'manage_options' ) ) wp_die( 'Permission denied.' );
check_admin_referer( 'oribi_sync_clear_pat' );
@@ -330,9 +342,10 @@ function oribi_sync_settings_page() {
<?php if ( $sync_result ): ?>
<div class="notice <?php echo $sync_result['ok'] ? 'notice-success' : 'notice-error'; ?> is-dismissible">
<p><strong><?php
if ( $done === 'dry' ) echo 'Dry-run results';
elseif ( $done === 'pull' ) echo 'Pull complete';
else echo 'Sync complete';
if ( $done === 'dry' ) echo 'Dry-run results';
elseif ( $done === 'pull' ) echo 'Pull complete';
elseif ( $done === 'force_pull' ) echo 'Force Pull complete';
else echo 'Sync complete';
?></strong></p>
<?php oribi_sync_render_result_list( $sync_result ); ?>
</div>
@@ -469,6 +482,11 @@ function oribi_sync_settings_page() {
onclick="return confirm('Pull content from the repo (no push). Continue?');">
Pull Only
</a>
<a href="<?php echo esc_url( wp_nonce_url( admin_url( 'admin-post.php?action=oribi_sync_force_pull' ), 'oribi_sync_force_pull' ) ); ?>"
class="button"
onclick="return confirm('Force re-pull ALL content from the repo, bypassing change detection. Continue?');">
Force Pull
</a>
<a href="<?php echo esc_url( wp_nonce_url( admin_url( 'admin-post.php?action=oribi_sync_push_all' ), 'oribi_sync_push_all' ) ); ?>"
class="button"
onclick="return confirm('Push all synced pages to the repo (no pull). Continue?');">

View File

@@ -733,14 +733,14 @@ function oribi_sync_run_posts(
}
$post_arr['ID'] = $existing->ID;
$post_id = wp_update_post( $post_arr, true );
$post_id = oribi_sync_save_post( $post_arr );
if ( is_wp_error( $post_id ) ) {
$result['errors'][] = $slug . ': ' . $post_id->get_error_message();
continue;
}
$result['updated'][] = $slug;
} else {
$post_id = wp_insert_post( $post_arr, true );
$post_id = oribi_sync_save_post( $post_arr );
if ( is_wp_error( $post_id ) ) {
$result['errors'][] = $slug . ': ' . $post_id->get_error_message();
continue;
@@ -788,7 +788,7 @@ function oribi_sync_run_posts(
$entry['path']
);
if ( $rewritten !== $html_content ) {
wp_update_post( [ 'ID' => $post_id, 'post_content' => $rewritten ] );
oribi_sync_save_post( [ 'ID' => $post_id, 'post_content' => $rewritten ] );
}
// ── Featured image ──────────────────────────────────────────────────
@@ -852,10 +852,13 @@ function oribi_sync_trash_removed_posts( array $current_slugs ): array {
* The post_content (HTML) is stored as the body — raw HTML is valid in
* most Markdown flavours and renders correctly when re-imported.
*
* @param WP_Post $post
* @param WP_Post $post
* @param int|null $post_id When provided, post_content is read raw from the DB.
* @return string Markdown source.
*/
function oribi_sync_generate_post_markdown( WP_Post $post ): string {
function oribi_sync_generate_post_markdown( WP_Post $post, ?int $post_id = null ): string {
global $wpdb;
$fm = "---\n";
// Title — decode HTML entities (WP stores &amp; etc. in DB) so the YAML
@@ -909,7 +912,14 @@ function oribi_sync_generate_post_markdown( WP_Post $post ): string {
$fm .= "---\n\n";
return $fm . $post->post_content;
// Read post_content directly from the DB when a post_id is supplied so
// we get exactly what oribi_sync_save_post() wrote, with no filter applied.
$id = $post_id ?? $post->ID;
$body = (string) $wpdb->get_var(
$wpdb->prepare( 'SELECT post_content FROM ' . $wpdb->posts . ' WHERE ID = %d', $id )
);
return $fm . $body;
}
// ─── Push post to repo ────────────────────────────────────────────────────────
@@ -976,7 +986,7 @@ function oribi_sync_push_post( int $post_id, array $opts = [] ): array {
$repo_path = rtrim( $posts_folder, '/' ) . '/' . $post->post_name . '.md';
}
$markdown_content = oribi_sync_generate_post_markdown( $post );
$markdown_content = oribi_sync_generate_post_markdown( $post, $post_id );
$commit_msg = $opts['message'] ?? 'Sync: update post ' . $post->post_name . ' from WordPress';
$new_checksum = hash( 'sha256', $markdown_content );

View File

@@ -386,9 +386,18 @@ function oribi_sync_push_page( int $post_id, array $opts = [] ): array {
}
// ── Generate content ──────────────────────────────────────────────────
$slug = $post->post_name;
$title = $post->post_title;
$wp_content = $post->post_content;
$slug = $post->post_name;
$title = $post->post_title;
// Read post_content directly from the DB — bypassing every get_post()
// filter — so we get exactly what oribi_sync_save_post() wrote.
global $wpdb;
$wp_content = (string) $wpdb->get_var(
$wpdb->prepare( 'SELECT post_content FROM ' . $wpdb->posts . ' WHERE ID = %d', $post_id )
);
// Clean any corruption baked in by previous syncs (e.g. \u0026amp; artefacts)
$wp_content = oribi_sync_clean_block_content( $wp_content );
$commit_msg = $opts['message'] ?? "Sync: update {$slug} from WordPress";

View File

@@ -11,6 +11,93 @@ if ( ! defined( 'ABSPATH' ) ) exit;
// ─── Helpers ──────────────────────────────────────────────────────────────────
/**
* Insert or update a post while writing post_content DIRECTLY to the DB.
*
* Every code-path in wp_insert_post / wp_update_post runs the content through
* sanitize_post_field() → apply_filters('pre_post_content') and
* apply_filters('content_save_pre'), both of which have wp_kses_post
* callbacks that turn & (inside Gutenberg block JSON) into &amp;.
* kses_remove_filters() only unhooks content_save_pre, NOT pre_post_content,
* so the ampersand corruption survived even with those wrappers.
*
* This helper lets WP create/update every other field normally (title, slug,
* status, dates, author …) with an empty content placeholder, then immediately
* overwrites post_content in the DB directly — no filters, no escaping beyond
* the $wpdb placeholder.
*
* @param array $post_arr Same shape as wp_insert_post / wp_update_post.
* @return int|WP_Error Post ID on success, WP_Error on failure.
*/
function oribi_sync_save_post( array $post_arr ) {
global $wpdb;
$content = $post_arr['post_content'] ?? '';
$post_arr['post_content'] = ''; // let WP handle everything else
if ( ! empty( $post_arr['ID'] ) ) {
$post_id = wp_update_post( $post_arr, true );
} else {
$post_id = wp_insert_post( $post_arr, true );
}
if ( is_wp_error( $post_id ) ) {
return $post_id;
}
$wpdb->update(
$wpdb->posts,
[ 'post_content' => $content ],
[ 'ID' => (int) $post_id ],
[ '%s' ],
[ '%d' ]
);
clean_post_cache( (int) $post_id );
return $post_id;
}
/**
* Clean previously-corrupted Gutenberg block content.
*
* Old syncs ran content through wp_kses_post which HTML-entity-encoded `&`
* inside JSON attributes to `&amp;`. php's json_encode then re-encoded that
* `&` to `\u0026`, producing `\u0026amp;` instead of just `\u0026`.
*
* This function corrects those artefacts so block JSON attributes contain
* the right unicode escape sequences.
*
* Also normalises plain `&amp;` → `&` inside JSON block comments so the
* next round of json_encode produces a single clean `\u0026`.
*
* @param string $content Gutenberg block HTML.
* @return string Cleaned block HTML.
*/
function oribi_sync_clean_block_content( string $content ): string {
// json_encode always hex-escapes & as \u0026 (even with JSON_UNESCAPED_UNICODE,
// which only affects codepoints > U+007F). Previous syncs also ran content
// through wp_kses_post which turned & into &amp;, so json_encode then produced
// \u0026amp; instead of just \u0026.
//
// Fix the double-encoded forms first, then unescape the remaining \u0026 back
// to literal & — Gutenberg's block JSON parser treats both identically.
// These sequences are unambiguous in Gutenberg block comment JSON.
$content = str_replace( '\u0026amp;', '&', $content );
$content = str_replace( '\u0026lt;', '<', $content );
$content = str_replace( '\u0026gt;', '>', $content );
$content = str_replace( '\u0026quot;', '"', $content );
$content = str_replace( '\u0026#039;', "'", $content );
// Clean any remaining plain hex-escapes of ASCII punctuation
$content = str_replace( '\u0026', '&', $content );
$content = str_replace( '\u003C', '<', $content );
$content = str_replace( '\u003E', '>', $content );
$content = str_replace( '\u0022', '"', $content );
$content = str_replace( '\u0027', "'", $content );
return $content;
}
/**
* Strip a case-insensitive directory prefix from a file path.
*
@@ -29,14 +116,26 @@ function oribi_sync_strip_prefix( string $path, string $prefix ): string {
/** Generate a self-closing block comment (standalone or child blocks). */
if ( ! function_exists( 'oribi_b' ) ) {
function oribi_b( $name, $attrs = [] ) {
return '<!-- wp:oribi/' . $name . ' ' . wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ) . ' /-->';
$json = wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES );
// json_encode always hex-escapes & < > ' for XSS safety, but these are
// inside HTML comments so they are safe as literals in Gutenberg block JSON.
$json = str_replace( [ '\u0026', '\u003C', '\u003E', '\u0022', '\u0027' ],
[ '&', '<', '>', '"', "'" ], $json );
return '<!-- wp:oribi/' . $name . ' ' . $json . ' /-->';
}
}
/** Generate an opening tag for a parent block comment. */
if ( ! function_exists( 'oribi_b_open' ) ) {
function oribi_b_open( $name, $attrs = [] ) {
$json = ! empty( $attrs ) ? ' ' . wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ) : '';
if ( ! empty( $attrs ) ) {
$json = wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES );
$json = str_replace( [ '\u0026', '\u003C', '\u003E', '\u0022', '\u0027' ],
[ '&', '<', '>', '"', "'" ], $json );
$json = ' ' . $json;
} else {
$json = '';
}
return '<!-- wp:oribi/' . $name . $json . ' -->';
}
}
@@ -100,10 +199,11 @@ function oribi_sync_execute_php( string $php_source, string $slug ) {
* Run the full page sync.
*
* @param bool $dry_run If true, returns what would happen without making changes.
* @param bool $force If true, bypasses SHA-based change detection and re-pulls all files.
*
* @return array{ok: bool, created: string[], updated: string[], trashed: string[], skipped: string[], errors: string[]}
*/
function oribi_sync_run( bool $dry_run = false ): array {
function oribi_sync_run( bool $dry_run = false, bool $force = false ): array {
$result = [
'ok' => true,
'created' => [],
@@ -177,7 +277,7 @@ function oribi_sync_run( bool $dry_run = false ): array {
$git_sha = $entry['sha'] ?? '';
$stored_git_sha = $existing ? get_post_meta( $existing->ID, '_oribi_sync_git_sha', true ) : '';
if ( $existing && ! empty( $git_sha ) && $git_sha === $stored_git_sha ) {
if ( ! $force && $existing && ! empty( $git_sha ) && $git_sha === $stored_git_sha ) {
$result['skipped'][] = $slug . ' (unchanged)';
if ( ! $dry_run ) {
update_post_meta( $existing->ID, '_oribi_sync_last_run', current_time( 'mysql' ) );
@@ -209,6 +309,9 @@ function oribi_sync_run( bool $dry_run = false ): array {
$content = $raw_content;
}
// Clean any corruption from previous syncs (e.g. \u0026amp; artefacts)
$content = oribi_sync_clean_block_content( $content );
// Checksum based on raw source — used as fallback for providers without tree SHA
$checksum = hash( 'sha256', $raw_content );
@@ -238,11 +341,11 @@ function oribi_sync_run( bool $dry_run = false ): array {
}
}
$update_result = wp_update_post( [
$update_result = oribi_sync_save_post( [
'ID' => $existing->ID,
'post_content' => $content,
'post_status' => 'publish',
], true );
] );
if ( is_wp_error( $update_result ) ) {
$result['errors'][] = $slug . ': ' . $update_result->get_error_message();
@@ -261,13 +364,13 @@ function oribi_sync_run( bool $dry_run = false ): array {
// Create new page
$title = oribi_sync_slug_to_title( $slug );
$post_id = wp_insert_post( [
$post_id = oribi_sync_save_post( [
'post_title' => $title,
'post_name' => $slug,
'post_status' => 'publish',
'post_type' => 'page',
'post_content' => $content,
], true );
] );
if ( is_wp_error( $post_id ) ) {
$result['errors'][] = $slug . ': ' . $post_id->get_error_message();
@@ -624,14 +727,16 @@ function oribi_sync_pull_page_from_repo( int $post_id ): array {
}
if ( $content !== null ) {
// Clean any corruption from previous syncs
$content = oribi_sync_clean_block_content( $content );
$checksum = hash( 'sha256', $raw_content );
$git_sha = $target_entry['sha'] ?? '';
$update = wp_update_post( [
$update = oribi_sync_save_post( [
'ID' => $post->ID,
'post_content' => $content,
'post_status' => 'publish',
], true );
] );
if ( is_wp_error( $update ) ) {
$result['errors'][] = $slug . ': ' . $update->get_error_message();