diff --git a/dist/oribi-tech-sync.zip b/dist/oribi-tech-sync.zip index d11c66a..e53d98b 100644 Binary files a/dist/oribi-tech-sync.zip and b/dist/oribi-tech-sync.zip differ diff --git a/includes/api-client.php b/includes/api-client.php index ff3c009..006ad15 100644 --- a/includes/api-client.php +++ b/includes/api-client.php @@ -380,13 +380,15 @@ function oribi_sync_fetch_file( string $api_base, string $branch, string $file_p $body = wp_remote_retrieve_body( $response ); - // For Gitea, the /contents/ endpoint returns base64-encoded content in JSON + // For Gitea, the /contents/ endpoint returns base64-encoded content in JSON. + // Gitea (like GitHub) inserts \n every 60 chars in the base64 — strip them before decoding. if ( $provider === 'gitea' ) { $decoded = json_decode( $body, true ); if ( isset( $decoded['content'] ) && is_string( $decoded['content'] ) ) { - $body = base64_decode( $decoded['content'], true ); + $clean = str_replace( [ "\r", "\n", " " ], '', $decoded['content'] ); + $body = base64_decode( $clean, true ); if ( $body === false ) { - return new WP_Error( 'oribi_sync_decode_error', 'Failed to decode base64 content from Gitea' ); + return new WP_Error( 'oribi_sync_decode_error', 'Failed to decode base64 content from Gitea.' ); } } } diff --git a/includes/post-sync.php b/includes/post-sync.php index 186b938..3ae727b 100644 --- a/includes/post-sync.php +++ b/includes/post-sync.php @@ -858,8 +858,9 @@ function oribi_sync_trash_removed_posts( array $current_slugs ): array { function oribi_sync_generate_post_markdown( WP_Post $post ): string { $fm = "---\n"; - // Title (escape newlines) - $fm .= 'title: ' . str_replace( [ "\r", "\n" ], ' ', $post->post_title ) . "\n"; + // Title — decode HTML entities (WP stores & etc. in DB) so the YAML + // file contains the literal character. On pull-back WP re-encodes correctly. + $fm .= 'title: ' . str_replace( [ "\r", "\n" ], ' ', html_entity_decode( $post->post_title, ENT_QUOTES | ENT_HTML5, 'UTF-8' ) ) . "\n"; $fm .= 'slug: ' . $post->post_name . "\n"; $fm .= 'status: ' . $post->post_status . "\n"; @@ -874,27 +875,27 @@ function oribi_sync_generate_post_markdown( WP_Post $post ): string { $fm .= 'author: ' . $author->user_login . "\n"; } - // Categories + // Categories — decode HTML entities stored by WP $cats = get_the_category( $post->ID ); if ( ! empty( $cats ) ) { $fm .= "categories:\n"; foreach ( $cats as $cat ) { - $fm .= ' - ' . $cat->name . "\n"; + $fm .= ' - ' . html_entity_decode( $cat->name, ENT_QUOTES | ENT_HTML5, 'UTF-8' ) . "\n"; } } - // Tags + // Tags — decode HTML entities stored by WP $post_tags = get_the_tags( $post->ID ); if ( ! empty( $post_tags ) ) { $fm .= "tags:\n"; foreach ( $post_tags as $tag ) { - $fm .= ' - ' . $tag->name . "\n"; + $fm .= ' - ' . html_entity_decode( $tag->name, ENT_QUOTES | ENT_HTML5, 'UTF-8' ) . "\n"; } } - // Excerpt + // Excerpt — decode HTML entities stored by WP if ( ! empty( $post->post_excerpt ) ) { - $fm .= 'excerpt: ' . str_replace( [ "\r", "\n" ], ' ', $post->post_excerpt ) . "\n"; + $fm .= 'excerpt: ' . str_replace( [ "\r", "\n" ], ' ', html_entity_decode( $post->post_excerpt, ENT_QUOTES | ENT_HTML5, 'UTF-8' ) ) . "\n"; } // Featured image (absolute URL so it round-trips cleanly) diff --git a/includes/push-client.php b/includes/push-client.php index fc644a1..6b65d8f 100644 --- a/includes/push-client.php +++ b/includes/push-client.php @@ -104,7 +104,7 @@ function oribi_sync_api_request( string $method, string $url, array $body, strin 'method' => $method, 'timeout' => 30, 'headers' => $headers, - 'body' => wp_json_encode( $body, JSON_UNESCAPED_UNICODE ), + 'body' => wp_json_encode( $body, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ), ]; $response = wp_remote_request( $url, $args ); @@ -152,9 +152,15 @@ function oribi_sync_gitea_get_file_meta( string $api_base, string $branch, strin return $result; } + // Gitea inserts \n every 60 chars in base64 — strip before decoding. + $raw_b64 = $result['content'] ?? ''; + $content = ! empty( $raw_b64 ) + ? base64_decode( str_replace( [ "\r", "\n", " " ], '', $raw_b64 ), true ) + : ''; + return [ 'sha' => $result['sha'] ?? '', - 'content' => isset( $result['content'] ) ? base64_decode( $result['content'] ) : '', + 'content' => ( $content !== false ) ? $content : '', ]; } @@ -182,6 +188,11 @@ function oribi_sync_gitea_put_file( ?string $sha = null, string $message = '' ) { + // Validate and fix UTF-8 encoding before base64-encoding + if ( ! mb_check_encoding( $content, 'UTF-8' ) ) { + $content = mb_convert_encoding( $content, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252' ); + } + $encoded_path = implode( '/', array_map( 'rawurlencode', explode( '/', $filepath ) ) ); $url = $api_base . '/contents/' . $encoded_path;