diff --git a/dist/oribi-tech-sync.zip b/dist/oribi-tech-sync.zip index e53d98b..46d9aa2 100644 Binary files a/dist/oribi-tech-sync.zip and b/dist/oribi-tech-sync.zip differ diff --git a/includes/admin.php b/includes/admin.php index cfcd30c..374c247 100644 --- a/includes/admin.php +++ b/includes/admin.php @@ -257,6 +257,18 @@ add_action( 'admin_post_oribi_sync_pull', function () { exit; } ); +add_action( 'admin_post_oribi_sync_force_pull', function () { + if ( ! current_user_can( 'manage_options' ) ) wp_die( 'Permission denied.' ); + check_admin_referer( 'oribi_sync_force_pull' ); + + $result = oribi_sync_run( false, true ); + + set_transient( 'oribi_sync_result', $result, 60 ); + + wp_redirect( add_query_arg( 'oribi_sync_done', 'force_pull', admin_url( 'options-general.php?page=oribi-sync' ) ) ); + exit; +} ); + add_action( 'admin_post_oribi_sync_clear_pat', function () { if ( ! current_user_can( 'manage_options' ) ) wp_die( 'Permission denied.' ); check_admin_referer( 'oribi_sync_clear_pat' ); @@ -330,9 +342,10 @@ function oribi_sync_settings_page() {

@@ -469,6 +482,11 @@ function oribi_sync_settings_page() { onclick="return confirm('Pull content from the repo (no push). Continue?');"> Pull Only + + Force Pull + diff --git a/includes/post-sync.php b/includes/post-sync.php index 3ae727b..ed0043e 100644 --- a/includes/post-sync.php +++ b/includes/post-sync.php @@ -733,14 +733,14 @@ function oribi_sync_run_posts( } $post_arr['ID'] = $existing->ID; - $post_id = wp_update_post( $post_arr, true ); + $post_id = oribi_sync_save_post( $post_arr ); if ( is_wp_error( $post_id ) ) { $result['errors'][] = $slug . ': ' . $post_id->get_error_message(); continue; } $result['updated'][] = $slug; } else { - $post_id = wp_insert_post( $post_arr, true ); + $post_id = oribi_sync_save_post( $post_arr ); if ( is_wp_error( $post_id ) ) { $result['errors'][] = $slug . ': ' . $post_id->get_error_message(); continue; @@ -788,7 +788,7 @@ function oribi_sync_run_posts( $entry['path'] ); if ( $rewritten !== $html_content ) { - wp_update_post( [ 'ID' => $post_id, 'post_content' => $rewritten ] ); + oribi_sync_save_post( [ 'ID' => $post_id, 'post_content' => $rewritten ] ); } // ── Featured image ────────────────────────────────────────────────── @@ -852,10 +852,13 @@ function oribi_sync_trash_removed_posts( array $current_slugs ): array { * The post_content (HTML) is stored as the body — raw HTML is valid in * most Markdown flavours and renders correctly when re-imported. * - * @param WP_Post $post + * @param WP_Post $post + * @param int|null $post_id When provided, post_content is read raw from the DB. * @return string Markdown source. */ -function oribi_sync_generate_post_markdown( WP_Post $post ): string { +function oribi_sync_generate_post_markdown( WP_Post $post, ?int $post_id = null ): string { + global $wpdb; + $fm = "---\n"; // Title — decode HTML entities (WP stores & etc. in DB) so the YAML @@ -909,7 +912,14 @@ function oribi_sync_generate_post_markdown( WP_Post $post ): string { $fm .= "---\n\n"; - return $fm . $post->post_content; + // Read post_content directly from the DB when a post_id is supplied so + // we get exactly what oribi_sync_save_post() wrote, with no filter applied. + $id = $post_id ?? $post->ID; + $body = (string) $wpdb->get_var( + $wpdb->prepare( 'SELECT post_content FROM ' . $wpdb->posts . ' WHERE ID = %d', $id ) + ); + + return $fm . $body; } // ─── Push post to repo ──────────────────────────────────────────────────────── @@ -976,7 +986,7 @@ function oribi_sync_push_post( int $post_id, array $opts = [] ): array { $repo_path = rtrim( $posts_folder, '/' ) . '/' . $post->post_name . '.md'; } - $markdown_content = oribi_sync_generate_post_markdown( $post ); + $markdown_content = oribi_sync_generate_post_markdown( $post, $post_id ); $commit_msg = $opts['message'] ?? 'Sync: update post ' . $post->post_name . ' from WordPress'; $new_checksum = hash( 'sha256', $markdown_content ); diff --git a/includes/push-client.php b/includes/push-client.php index 6b65d8f..3bbf7d9 100644 --- a/includes/push-client.php +++ b/includes/push-client.php @@ -386,9 +386,18 @@ function oribi_sync_push_page( int $post_id, array $opts = [] ): array { } // ── Generate content ────────────────────────────────────────────────── - $slug = $post->post_name; - $title = $post->post_title; - $wp_content = $post->post_content; + $slug = $post->post_name; + $title = $post->post_title; + + // Read post_content directly from the DB — bypassing every get_post() + // filter — so we get exactly what oribi_sync_save_post() wrote. + global $wpdb; + $wp_content = (string) $wpdb->get_var( + $wpdb->prepare( 'SELECT post_content FROM ' . $wpdb->posts . ' WHERE ID = %d', $post_id ) + ); + + // Clean any corruption baked in by previous syncs (e.g. \u0026amp; artefacts) + $wp_content = oribi_sync_clean_block_content( $wp_content ); $commit_msg = $opts['message'] ?? "Sync: update {$slug} from WordPress"; diff --git a/includes/sync-engine.php b/includes/sync-engine.php index 7ab39d4..d8670af 100644 --- a/includes/sync-engine.php +++ b/includes/sync-engine.php @@ -11,6 +11,93 @@ if ( ! defined( 'ABSPATH' ) ) exit; // ─── Helpers ────────────────────────────────────────────────────────────────── +/** + * Insert or update a post while writing post_content DIRECTLY to the DB. + * + * Every code-path in wp_insert_post / wp_update_post runs the content through + * sanitize_post_field() → apply_filters('pre_post_content') and + * apply_filters('content_save_pre'), both of which have wp_kses_post + * callbacks that turn & (inside Gutenberg block JSON) into &. + * kses_remove_filters() only unhooks content_save_pre, NOT pre_post_content, + * so the ampersand corruption survived even with those wrappers. + * + * This helper lets WP create/update every other field normally (title, slug, + * status, dates, author …) with an empty content placeholder, then immediately + * overwrites post_content in the DB directly — no filters, no escaping beyond + * the $wpdb placeholder. + * + * @param array $post_arr Same shape as wp_insert_post / wp_update_post. + * @return int|WP_Error Post ID on success, WP_Error on failure. + */ +function oribi_sync_save_post( array $post_arr ) { + global $wpdb; + + $content = $post_arr['post_content'] ?? ''; + $post_arr['post_content'] = ''; // let WP handle everything else + + if ( ! empty( $post_arr['ID'] ) ) { + $post_id = wp_update_post( $post_arr, true ); + } else { + $post_id = wp_insert_post( $post_arr, true ); + } + + if ( is_wp_error( $post_id ) ) { + return $post_id; + } + + $wpdb->update( + $wpdb->posts, + [ 'post_content' => $content ], + [ 'ID' => (int) $post_id ], + [ '%s' ], + [ '%d' ] + ); + + clean_post_cache( (int) $post_id ); + + return $post_id; +} + +/** + * Clean previously-corrupted Gutenberg block content. + * + * Old syncs ran content through wp_kses_post which HTML-entity-encoded `&` + * inside JSON attributes to `&`. php's json_encode then re-encoded that + * `&` to `\u0026`, producing `\u0026amp;` instead of just `\u0026`. + * + * This function corrects those artefacts so block JSON attributes contain + * the right unicode escape sequences. + * + * Also normalises plain `&` → `&` inside JSON block comments so the + * next round of json_encode produces a single clean `\u0026`. + * + * @param string $content Gutenberg block HTML. + * @return string Cleaned block HTML. + */ +function oribi_sync_clean_block_content( string $content ): string { + // json_encode always hex-escapes & as \u0026 (even with JSON_UNESCAPED_UNICODE, + // which only affects codepoints > U+007F). Previous syncs also ran content + // through wp_kses_post which turned & into &, so json_encode then produced + // \u0026amp; instead of just \u0026. + // + // Fix the double-encoded forms first, then unescape the remaining \u0026 back + // to literal & — Gutenberg's block JSON parser treats both identically. + // These sequences are unambiguous in Gutenberg block comment JSON. + $content = str_replace( '\u0026amp;', '&', $content ); + $content = str_replace( '\u0026lt;', '<', $content ); + $content = str_replace( '\u0026gt;', '>', $content ); + $content = str_replace( '\u0026quot;', '"', $content ); + $content = str_replace( '\u0026#039;', "'", $content ); + // Clean any remaining plain hex-escapes of ASCII punctuation + $content = str_replace( '\u0026', '&', $content ); + $content = str_replace( '\u003C', '<', $content ); + $content = str_replace( '\u003E', '>', $content ); + $content = str_replace( '\u0022', '"', $content ); + $content = str_replace( '\u0027', "'", $content ); + + return $content; +} + /** * Strip a case-insensitive directory prefix from a file path. * @@ -29,14 +116,26 @@ function oribi_sync_strip_prefix( string $path, string $prefix ): string { /** Generate a self-closing block comment (standalone or child blocks). */ if ( ! function_exists( 'oribi_b' ) ) { function oribi_b( $name, $attrs = [] ) { - return ''; + $json = wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ); + // json_encode always hex-escapes & < > ' for XSS safety, but these are + // inside HTML comments so they are safe as literals in Gutenberg block JSON. + $json = str_replace( [ '\u0026', '\u003C', '\u003E', '\u0022', '\u0027' ], + [ '&', '<', '>', '"', "'" ], $json ); + return ''; } } /** Generate an opening tag for a parent block comment. */ if ( ! function_exists( 'oribi_b_open' ) ) { function oribi_b_open( $name, $attrs = [] ) { - $json = ! empty( $attrs ) ? ' ' . wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ) : ''; + if ( ! empty( $attrs ) ) { + $json = wp_json_encode( $attrs, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ); + $json = str_replace( [ '\u0026', '\u003C', '\u003E', '\u0022', '\u0027' ], + [ '&', '<', '>', '"', "'" ], $json ); + $json = ' ' . $json; + } else { + $json = ''; + } return ''; } } @@ -100,10 +199,11 @@ function oribi_sync_execute_php( string $php_source, string $slug ) { * Run the full page sync. * * @param bool $dry_run If true, returns what would happen without making changes. + * @param bool $force If true, bypasses SHA-based change detection and re-pulls all files. * * @return array{ok: bool, created: string[], updated: string[], trashed: string[], skipped: string[], errors: string[]} */ -function oribi_sync_run( bool $dry_run = false ): array { +function oribi_sync_run( bool $dry_run = false, bool $force = false ): array { $result = [ 'ok' => true, 'created' => [], @@ -177,7 +277,7 @@ function oribi_sync_run( bool $dry_run = false ): array { $git_sha = $entry['sha'] ?? ''; $stored_git_sha = $existing ? get_post_meta( $existing->ID, '_oribi_sync_git_sha', true ) : ''; - if ( $existing && ! empty( $git_sha ) && $git_sha === $stored_git_sha ) { + if ( ! $force && $existing && ! empty( $git_sha ) && $git_sha === $stored_git_sha ) { $result['skipped'][] = $slug . ' (unchanged)'; if ( ! $dry_run ) { update_post_meta( $existing->ID, '_oribi_sync_last_run', current_time( 'mysql' ) ); @@ -209,6 +309,9 @@ function oribi_sync_run( bool $dry_run = false ): array { $content = $raw_content; } + // Clean any corruption from previous syncs (e.g. \u0026amp; artefacts) + $content = oribi_sync_clean_block_content( $content ); + // Checksum based on raw source — used as fallback for providers without tree SHA $checksum = hash( 'sha256', $raw_content ); @@ -238,11 +341,11 @@ function oribi_sync_run( bool $dry_run = false ): array { } } - $update_result = wp_update_post( [ + $update_result = oribi_sync_save_post( [ 'ID' => $existing->ID, 'post_content' => $content, 'post_status' => 'publish', - ], true ); + ] ); if ( is_wp_error( $update_result ) ) { $result['errors'][] = $slug . ': ' . $update_result->get_error_message(); @@ -261,13 +364,13 @@ function oribi_sync_run( bool $dry_run = false ): array { // Create new page $title = oribi_sync_slug_to_title( $slug ); - $post_id = wp_insert_post( [ + $post_id = oribi_sync_save_post( [ 'post_title' => $title, 'post_name' => $slug, 'post_status' => 'publish', 'post_type' => 'page', 'post_content' => $content, - ], true ); + ] ); if ( is_wp_error( $post_id ) ) { $result['errors'][] = $slug . ': ' . $post_id->get_error_message(); @@ -624,14 +727,16 @@ function oribi_sync_pull_page_from_repo( int $post_id ): array { } if ( $content !== null ) { + // Clean any corruption from previous syncs + $content = oribi_sync_clean_block_content( $content ); $checksum = hash( 'sha256', $raw_content ); $git_sha = $target_entry['sha'] ?? ''; - $update = wp_update_post( [ + $update = oribi_sync_save_post( [ 'ID' => $post->ID, 'post_content' => $content, 'post_status' => 'publish', - ], true ); + ] ); if ( is_wp_error( $update ) ) { $result['errors'][] = $slug . ': ' . $update->get_error_message();