Enhance Gitea support by improving base64 decoding and UTF-8 encoding handling

This commit is contained in:
Matt Batchelder
2026-02-21 20:08:18 -05:00
parent 634e93236f
commit 3b51382797
4 changed files with 27 additions and 13 deletions

Binary file not shown.

View File

@@ -380,13 +380,15 @@ function oribi_sync_fetch_file( string $api_base, string $branch, string $file_p
$body = wp_remote_retrieve_body( $response ); $body = wp_remote_retrieve_body( $response );
// For Gitea, the /contents/ endpoint returns base64-encoded content in JSON // For Gitea, the /contents/ endpoint returns base64-encoded content in JSON.
// Gitea (like GitHub) inserts \n every 60 chars in the base64 — strip them before decoding.
if ( $provider === 'gitea' ) { if ( $provider === 'gitea' ) {
$decoded = json_decode( $body, true ); $decoded = json_decode( $body, true );
if ( isset( $decoded['content'] ) && is_string( $decoded['content'] ) ) { if ( isset( $decoded['content'] ) && is_string( $decoded['content'] ) ) {
$body = base64_decode( $decoded['content'], true ); $clean = str_replace( [ "\r", "\n", " " ], '', $decoded['content'] );
$body = base64_decode( $clean, true );
if ( $body === false ) { if ( $body === false ) {
return new WP_Error( 'oribi_sync_decode_error', 'Failed to decode base64 content from Gitea' ); return new WP_Error( 'oribi_sync_decode_error', 'Failed to decode base64 content from Gitea.' );
} }
} }
} }

View File

@@ -858,8 +858,9 @@ function oribi_sync_trash_removed_posts( array $current_slugs ): array {
function oribi_sync_generate_post_markdown( WP_Post $post ): string { function oribi_sync_generate_post_markdown( WP_Post $post ): string {
$fm = "---\n"; $fm = "---\n";
// Title (escape newlines) // Title — decode HTML entities (WP stores & etc. in DB) so the YAML
$fm .= 'title: ' . str_replace( [ "\r", "\n" ], ' ', $post->post_title ) . "\n"; // file contains the literal character. On pull-back WP re-encodes correctly.
$fm .= 'title: ' . str_replace( [ "\r", "\n" ], ' ', html_entity_decode( $post->post_title, ENT_QUOTES | ENT_HTML5, 'UTF-8' ) ) . "\n";
$fm .= 'slug: ' . $post->post_name . "\n"; $fm .= 'slug: ' . $post->post_name . "\n";
$fm .= 'status: ' . $post->post_status . "\n"; $fm .= 'status: ' . $post->post_status . "\n";
@@ -874,27 +875,27 @@ function oribi_sync_generate_post_markdown( WP_Post $post ): string {
$fm .= 'author: ' . $author->user_login . "\n"; $fm .= 'author: ' . $author->user_login . "\n";
} }
// Categories // Categories — decode HTML entities stored by WP
$cats = get_the_category( $post->ID ); $cats = get_the_category( $post->ID );
if ( ! empty( $cats ) ) { if ( ! empty( $cats ) ) {
$fm .= "categories:\n"; $fm .= "categories:\n";
foreach ( $cats as $cat ) { foreach ( $cats as $cat ) {
$fm .= ' - ' . $cat->name . "\n"; $fm .= ' - ' . html_entity_decode( $cat->name, ENT_QUOTES | ENT_HTML5, 'UTF-8' ) . "\n";
} }
} }
// Tags // Tags — decode HTML entities stored by WP
$post_tags = get_the_tags( $post->ID ); $post_tags = get_the_tags( $post->ID );
if ( ! empty( $post_tags ) ) { if ( ! empty( $post_tags ) ) {
$fm .= "tags:\n"; $fm .= "tags:\n";
foreach ( $post_tags as $tag ) { foreach ( $post_tags as $tag ) {
$fm .= ' - ' . $tag->name . "\n"; $fm .= ' - ' . html_entity_decode( $tag->name, ENT_QUOTES | ENT_HTML5, 'UTF-8' ) . "\n";
} }
} }
// Excerpt // Excerpt — decode HTML entities stored by WP
if ( ! empty( $post->post_excerpt ) ) { if ( ! empty( $post->post_excerpt ) ) {
$fm .= 'excerpt: ' . str_replace( [ "\r", "\n" ], ' ', $post->post_excerpt ) . "\n"; $fm .= 'excerpt: ' . str_replace( [ "\r", "\n" ], ' ', html_entity_decode( $post->post_excerpt, ENT_QUOTES | ENT_HTML5, 'UTF-8' ) ) . "\n";
} }
// Featured image (absolute URL so it round-trips cleanly) // Featured image (absolute URL so it round-trips cleanly)

View File

@@ -104,7 +104,7 @@ function oribi_sync_api_request( string $method, string $url, array $body, strin
'method' => $method, 'method' => $method,
'timeout' => 30, 'timeout' => 30,
'headers' => $headers, 'headers' => $headers,
'body' => wp_json_encode( $body, JSON_UNESCAPED_UNICODE ), 'body' => wp_json_encode( $body, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES ),
]; ];
$response = wp_remote_request( $url, $args ); $response = wp_remote_request( $url, $args );
@@ -152,9 +152,15 @@ function oribi_sync_gitea_get_file_meta( string $api_base, string $branch, strin
return $result; return $result;
} }
// Gitea inserts \n every 60 chars in base64 — strip before decoding.
$raw_b64 = $result['content'] ?? '';
$content = ! empty( $raw_b64 )
? base64_decode( str_replace( [ "\r", "\n", " " ], '', $raw_b64 ), true )
: '';
return [ return [
'sha' => $result['sha'] ?? '', 'sha' => $result['sha'] ?? '',
'content' => isset( $result['content'] ) ? base64_decode( $result['content'] ) : '', 'content' => ( $content !== false ) ? $content : '',
]; ];
} }
@@ -182,6 +188,11 @@ function oribi_sync_gitea_put_file(
?string $sha = null, ?string $sha = null,
string $message = '' string $message = ''
) { ) {
// Validate and fix UTF-8 encoding before base64-encoding
if ( ! mb_check_encoding( $content, 'UTF-8' ) ) {
$content = mb_convert_encoding( $content, 'UTF-8', 'UTF-8, ISO-8859-1, Windows-1252' );
}
$encoded_path = implode( '/', array_map( 'rawurlencode', explode( '/', $filepath ) ) ); $encoded_path = implode( '/', array_map( 'rawurlencode', explode( '/', $filepath ) ) );
$url = $api_base . '/contents/' . $encoded_path; $url = $api_base . '/contents/' . $encoded_path;