Files
WordpressGitSync/includes/post-sync.php
Matt Batchelder d56d46490a Add post synchronization functionality for Markdown files
- Implemented a parser for YAML front-matter in Markdown files.
- Developed functions to convert Markdown content to HTML.
- Created a pipeline to sync WordPress posts from a specified folder in a Git repository.
- Added media import capabilities to handle images referenced in Markdown.
- Implemented author resolution and post slug generation.
- Included error handling and logging for sync operations.
- Enabled trashing of posts that are no longer present in the repository.
2026-02-21 10:44:34 -05:00

1066 lines
42 KiB
PHP

<?php
/**
* Oribi Sync — Posts sync.
*
* Imports WordPress posts from Markdown files with YAML front-matter
* found in a configurable folder in the Git repository.
*
* Supported front-matter fields:
* title, slug, status, date, author, categories, tags, excerpt, featured_image
*
* Media referenced with relative paths or absolute URLs is automatically
* downloaded into the WP media library and URLs are rewritten.
*/
if ( ! defined( 'ABSPATH' ) ) exit;
// ─── Front-matter parser ──────────────────────────────────────────────────────
/**
* Split a raw Markdown string into YAML front-matter and body.
*
* Supports:
* - Scalar: key: value
* - Boolean: key: true / key: false
* - Inline array: tags: [a, b, c]
* - Block list:
* categories:
* - Foo
* - Bar
*
* @param string $raw Raw file content (UTF-8).
* @return array{front_matter: array, body: string}
*/
function oribi_sync_parse_front_matter( string $raw ): array {
$empty = [ 'front_matter' => [], 'body' => $raw ];
$raw = ltrim( $raw );
if ( strncmp( $raw, '---', 3 ) !== 0 ) {
return $empty;
}
// Find the closing --- (must start at beginning of a line)
$after_open = substr( $raw, 3 );
$close_pos = strpos( $after_open, "\n---" );
if ( $close_pos === false ) {
return $empty;
}
$yaml_part = substr( $after_open, 0, $close_pos );
// Body begins after closing --- and optional newline
$body_raw = substr( $after_open, $close_pos + 4 );
$body = ltrim( $body_raw, "\r\n" );
$fm = [];
$current_list_key = null;
foreach ( explode( "\n", $yaml_part ) as $line ) {
// Block list item (leading spaces + dash)
if ( $current_list_key !== null && preg_match( '/^\s+-\s+(.+)$/', $line, $m ) ) {
$fm[ $current_list_key ][] = trim( $m[1], '"\' ' );
continue;
}
$current_list_key = null;
// key: value
if ( ! preg_match( '/^([\w][\w-]*):\s*(.*)$/', $line, $m ) ) {
continue;
}
$key = strtolower( $m[1] );
$value = trim( $m[2] );
if ( $value === 'true' ) {
$fm[ $key ] = true;
} elseif ( $value === 'false' ) {
$fm[ $key ] = false;
} elseif ( $value === '' ) {
// Block list follows
$fm[ $key ] = [];
$current_list_key = $key;
} elseif ( $value[0] === '[' && substr( $value, -1 ) === ']' ) {
// Inline array [a, b, c]
$inner = substr( $value, 1, -1 );
$fm[ $key ] = array_map( function ( $v ) {
return trim( $v, '"\' ' );
}, explode( ',', $inner ) );
} else {
$fm[ $key ] = trim( $value, '"\' ' );
}
}
return [ 'front_matter' => $fm, 'body' => $body ];
}
// ─── Markdown to HTML ─────────────────────────────────────────────────────────
/**
* Process inline Markdown elements on a text fragment.
*
* Applied to every run of text that is NOT inside a protected code block.
*
* @param string $text
* @return string HTML
*/
function oribi_sync_md_inline( string $text ): string {
// Images first (must come before links to avoid mismatching alt text)
$text = preg_replace(
'/!\[([^\]]*)\]\(([^\s\)]+)(?:\s+"[^"]*")?\)/',
'<img src="$2" alt="$1" />',
$text
);
// Links [text](url "optional title")
$text = preg_replace(
'/\[([^\]]+)\]\(([^\s\)]+)(?:\s+"[^"]*")?\)/',
'<a href="$2">$1</a>',
$text
);
// Bold + italic ***…*** or ___…___
$text = preg_replace( '/\*{3}(.+?)\*{3}/s', '<strong><em>$1</em></strong>', $text );
$text = preg_replace( '/_{3}(.+?)_{3}/s', '<strong><em>$1</em></strong>', $text );
// Bold **…** or __…__
$text = preg_replace( '/\*{2}(.+?)\*{2}/s', '<strong>$1</strong>', $text );
$text = preg_replace( '/_{2}(.+?)_{2}/s', '<strong>$1</strong>', $text );
// Italic *…* (not inside a word)
$text = preg_replace( '/(?<!\w)\*([^*\n]+?)\*(?!\w)/', '<em>$1</em>', $text );
$text = preg_replace( '/(?<!\w)_([^_\n]+?)_(?!\w)/', '<em>$1</em>', $text );
// Strikethrough ~~…~~
$text = preg_replace( '/~~(.+?)~~/s', '<del>$1</del>', $text );
// Hard line break: 2+ spaces before newline
$text = preg_replace( '/ +\n/', "<br />\n", $text );
return $text;
}
/**
* Convert a Markdown string to HTML.
*
* Handles: fenced code blocks, ATX headings, setext headings, blockquotes,
* horizontal rules, unordered and ordered lists, images, paragraphs,
* and all inline formatting.
*
* @param string $md Markdown source.
* @return string HTML output.
*/
function oribi_sync_markdown_to_html( string $md ): string {
// Normalize line endings
$md = str_replace( [ "\r\n", "\r" ], "\n", $md );
// ── Protect fenced code blocks ──────────────────────────────────────────
$fenced = [];
$md = preg_replace_callback(
'/^(`{3,}|~{3,})([\w-]*)\n(.*?)\n\1\h*$/ms',
function ( $m ) use ( &$fenced ) {
$tok = "\x02FENCE" . count( $fenced ) . "\x03";
$lang = $m[2] !== '' ? ' class="language-' . htmlspecialchars( $m[2], ENT_QUOTES ) . '"' : '';
$fenced[ $tok ] = '<pre><code' . $lang . '>' . htmlspecialchars( $m[3] ) . '</code></pre>';
return $tok;
},
$md
);
// ── Protect inline code ─────────────────────────────────────────────────
$icodes = [];
$md = preg_replace_callback(
'/``(.+?)``|`([^`\n]+)`/s',
function ( $m ) use ( &$icodes ) {
$tok = "\x02ICODE" . count( $icodes ) . "\x03";
$content = $m[1] !== '' ? $m[1] : $m[2];
$icodes[ $tok ] = '<code>' . htmlspecialchars( $content ) . '</code>';
return $tok;
},
$md
);
$lines = explode( "\n", $md );
$out = []; // accumulated output blocks (strings)
$para = []; // accumulated paragraph lines
$in_list = null; // 'ul' | 'ol' | null
$list_buf = []; // <li> items for current list
$flush_para = function () use ( &$para, &$out ) {
if ( empty( $para ) ) return;
$text = implode( "\n", $para );
$out[] = '<p>' . oribi_sync_md_inline( $text ) . '</p>';
$para = [];
};
$flush_list = function () use ( &$in_list, &$list_buf, &$out ) {
if ( $in_list === null ) return;
$tag = $in_list;
$items = implode( '', $list_buf );
$out[] = "<{$tag}>{$items}</{$tag}>";
$in_list = null;
$list_buf = [];
};
$n = count( $lines );
for ( $i = 0; $i < $n; $i++ ) {
$line = $lines[ $i ];
// ── Fenced block placeholder ────────────────────────────────────────
if ( isset( $fenced[ trim( $line ) ] ) ) {
$flush_para();
$flush_list();
$out[] = $fenced[ trim( $line ) ];
continue;
}
// ── Blank line ──────────────────────────────────────────────────────
if ( trim( $line ) === '' ) {
$flush_para();
$flush_list();
continue;
}
// ── ATX heading #…###### ───────────────────────────────────────────
if ( preg_match( '/^(#{1,6})\s+(.+?)(?:\s+#+\s*)?$/', $line, $m ) ) {
$flush_para();
$flush_list();
$lvl = strlen( $m[1] );
$out[] = "<h{$lvl}>" . oribi_sync_md_inline( trim( $m[2] ) ) . "</h{$lvl}>";
continue;
}
// ── Setext heading (text followed by === or ---) ────────────────────
if ( ! empty( $para ) && isset( $lines[ $i + 1 ] ) ) {
$next = $lines[ $i + 1 ];
if ( preg_match( '/^=+\s*$/', $next ) ) {
$flush_list();
$text = implode( "\n", $para ) . "\n" . $line;
$out[] = '<h1>' . oribi_sync_md_inline( trim( $text ) ) . '</h1>';
$para = [];
$i++;
continue;
}
if ( preg_match( '/^-{2,}\s*$/', $next ) ) {
$flush_list();
$text = implode( "\n", $para ) . "\n" . $line;
$out[] = '<h2>' . oribi_sync_md_inline( trim( $text ) ) . '</h2>';
$para = [];
$i++;
continue;
}
}
// ── Horizontal rule ─────────────────────────────────────────────────
if ( preg_match( '/^(?:\*\s*){3,}$|^(?:-\s*){3,}$|^(?:_\s*){3,}$/', trim( $line ) ) ) {
$flush_para();
$flush_list();
$out[] = '<hr />';
continue;
}
// ── Blockquote ──────────────────────────────────────────────────────
if ( preg_match( '/^>\s?(.*)$/', $line, $m ) ) {
$flush_para();
$flush_list();
$out[] = '<blockquote><p>' . oribi_sync_md_inline( $m[1] ) . '</p></blockquote>';
continue;
}
// ── Unordered list item ─────────────────────────────────────────────
if ( preg_match( '/^[-*+]\s+(.+)$/', $line, $m ) ) {
$flush_para();
if ( $in_list !== 'ul' ) {
$flush_list();
$in_list = 'ul';
}
$list_buf[] = '<li>' . oribi_sync_md_inline( $m[1] ) . '</li>';
continue;
}
// ── Ordered list item ───────────────────────────────────────────────
if ( preg_match( '/^\d+[.)]\s+(.+)$/', $line, $m ) ) {
$flush_para();
if ( $in_list !== 'ol' ) {
$flush_list();
$in_list = 'ol';
}
$list_buf[] = '<li>' . oribi_sync_md_inline( $m[1] ) . '</li>';
continue;
}
// ── Regular text → paragraph ────────────────────────────────────────
$flush_list();
$para[] = $line;
}
$flush_para();
$flush_list();
$html = implode( "\n", $out );
// Restore protected tokens
$html = strtr( $html, $fenced );
$html = strtr( $html, $icodes );
return trim( $html );
}
// ─── Author resolution ────────────────────────────────────────────────────────
/**
* Resolve a front-matter author value to a WP user ID.
*
* Tries (in order): user_login, user_email, display_name.
* Falls back to the current user ID, or 1 if no current user.
*
* @param string $identifier Value from front-matter `author` field.
* @return int WP user ID.
*/
function oribi_sync_resolve_author( string $identifier ): int {
if ( empty( $identifier ) ) {
$uid = get_current_user_id();
return $uid > 0 ? $uid : 1;
}
// Try login
$user = get_user_by( 'login', $identifier );
if ( $user ) return $user->ID;
// Try email
$user = get_user_by( 'email', $identifier );
if ( $user ) return $user->ID;
// Try display_name (slower, via meta query)
$users = get_users( [ 'search' => $identifier, 'search_columns' => [ 'display_name' ], 'number' => 1 ] );
if ( ! empty( $users ) ) return $users[0]->ID;
$uid = get_current_user_id();
return $uid > 0 ? $uid : 1;
}
// ─── Post slug helpers ────────────────────────────────────────────────────────
/**
* Derive a URL-safe slug from a post filename.
*
* Strips the extension, then strips a leading YYYY-MM-DD- date prefix if present.
* Examples:
* 2026-02-21-my-great-post.md → my-great-post
* hello-world.markdown → hello-world
*/
function oribi_sync_post_filename_to_slug( string $filename ): string {
$base = pathinfo( $filename, PATHINFO_FILENAME );
// Remove leading date prefix YYYY-MM-DD-
$base = preg_replace( '/^\d{4}-\d{2}-\d{2}-/', '', $base );
return sanitize_title( $base );
}
/**
* Find a WP `post` record that was previously synced and has the given slug.
*/
function oribi_sync_get_synced_post_by_slug( string $slug ): ?WP_Post {
$posts = get_posts( [
'name' => $slug,
'post_type' => 'post',
'post_status' => [ 'publish', 'draft', 'pending', 'private' ],
'meta_key' => '_oribi_sync_checksum',
'posts_per_page' => 1,
] );
return ! empty( $posts ) ? $posts[0] : null;
}
// ─── Media import helpers ─────────────────────────────────────────────────────
/**
* Resolve a relative image path (from a post file) to an absolute repo path.
*
* @param string $src Image src (relative or absolute URL).
* @param string $post_file_path Repo path of the Markdown file (e.g. posts/2026-hello.md).
* @return string Resolved repo-relative path, or empty string if $src is an absolute URL.
*/
function oribi_sync_resolve_repo_image_path( string $src, string $post_file_path ): string {
if ( preg_match( '#^https?://#i', $src ) ) {
return ''; // Absolute URL — caller handles directly
}
$post_dir = trim( dirname( $post_file_path ), '.' . DIRECTORY_SEPARATOR . '/' );
if ( $post_dir !== '' ) {
$combined = $post_dir . '/' . ltrim( $src, '/' );
} else {
$combined = ltrim( $src, '/' );
}
// Normalize ./ and ../
$parts = explode( '/', $combined );
$normalized = [];
foreach ( $parts as $part ) {
if ( $part === '..' ) {
array_pop( $normalized );
} elseif ( $part !== '.' && $part !== '' ) {
$normalized[] = $part;
}
}
return implode( '/', $normalized );
}
/**
* Sideload an image file into the WP media library.
*
* For absolute URLs, the file is downloaded directly.
* For relative repo paths, the file is fetched via the authenticated API.
*
* Deduplicates by tracking the original src in `_oribi_sync_origin_src` post meta.
*
* @param string $src Original src attribute value.
* @param int $post_id Parent post to attach to.
* @param string $api_base Provider API base URL.
* @param string $branch Branch name.
* @param string $provider Provider key.
* @param string $pat Personal access token.
* @param string $post_file_path Repo path of the Markdown file.
* @return int Attachment post ID, or 0 on failure.
*/
function oribi_sync_sideload_attachment(
string $src,
int $post_id,
string $api_base,
string $branch,
string $provider,
string $pat,
string $post_file_path
): int {
if ( ! function_exists( 'media_handle_sideload' ) ) {
require_once ABSPATH . 'wp-admin/includes/file.php';
require_once ABSPATH . 'wp-admin/includes/media.php';
require_once ABSPATH . 'wp-admin/includes/image.php';
}
// Deduplication: check if this src was already imported
global $wpdb;
$existing_id = $wpdb->get_var( $wpdb->prepare( // phpcs:ignore WordPress.DB.DirectDatabaseQuery
"SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_oribi_sync_origin_src' AND meta_value = %s LIMIT 1",
$src
) );
if ( $existing_id ) {
return (int) $existing_id;
}
$filename = basename( strtok( $src, '?' ) );
$tmp_path = null;
if ( preg_match( '#^https?://#i', $src ) ) {
// Absolute public URL — use WP's built-in download
$tmp = download_url( $src, 30 );
if ( is_wp_error( $tmp ) ) {
return 0;
}
$tmp_path = $tmp;
} else {
// Relative repo path — fetch via authenticated API
$repo_path = oribi_sync_resolve_repo_image_path( $src, $post_file_path );
if ( empty( $repo_path ) ) {
return 0;
}
$content = oribi_sync_fetch_file( $api_base, $branch, $repo_path, $provider, $pat );
if ( is_wp_error( $content ) ) {
return 0;
}
$tmp_path = wp_tempnam( $filename );
if ( ! $tmp_path ) {
return 0;
}
// phpcs:ignore WordPress.WP.AlternativeFunctions.file_system_operations_file_put_contents
file_put_contents( $tmp_path, $content );
}
$file_array = [
'name' => $filename,
'tmp_name' => $tmp_path,
];
$att_id = media_handle_sideload( $file_array, $post_id );
// phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged
@unlink( $tmp_path );
if ( is_wp_error( $att_id ) ) {
return 0;
}
// Store src for deduplication on future syncs
add_post_meta( $att_id, '_oribi_sync_origin_src', $src, true );
return $att_id;
}
/**
* Scan HTML content for <img> tags, sideload each image, and rewrite src attributes.
*
* @return string HTML with src attributes pointing to local media library URLs.
*/
function oribi_sync_import_media_in_content(
int $post_id,
string $html,
string $api_base,
string $branch,
string $provider,
string $pat,
string $post_file_path
): string {
if ( empty( $html ) ) {
return $html;
}
$changed = false;
$html = preg_replace_callback(
'/<img([^>]+)>/i',
function ( $tag_match ) use ( $post_id, $api_base, $branch, $provider, $pat, $post_file_path, &$changed ) {
$tag = $tag_match[0];
$attrs = $tag_match[1];
// Extract src
if ( ! preg_match( '/src=["\']([^"\']+)["\']/i', $attrs, $src_match ) ) {
return $tag;
}
$original_src = $src_match[1];
// Skip data URIs and WP-hosted images (already local)
if ( strncmp( $original_src, 'data:', 5 ) === 0 ) {
return $tag;
}
$upload_dir = wp_upload_dir();
if ( strpos( $original_src, $upload_dir['baseurl'] ) === 0 ) {
return $tag;
}
$att_id = oribi_sync_sideload_attachment(
$original_src, $post_id, $api_base, $branch, $provider, $pat, $post_file_path
);
if ( ! $att_id ) {
return $tag;
}
$local_url = wp_get_attachment_url( $att_id );
if ( ! $local_url ) {
return $tag;
}
$changed = true;
$new_attrs = preg_replace(
'/src=["\'][^"\']+["\']/i',
'src="' . esc_url( $local_url ) . '"',
$attrs
);
return '<img' . $new_attrs . '>';
},
$html
);
return $html !== null ? $html : $html;
}
// ─── Posts sync pipeline ──────────────────────────────────────────────────────
/**
* Sync WordPress posts from the configured posts folder in the repository.
*
* Called from oribi_sync_run() after fetching the tree.
*
* @param string $api_base Provider API base URL.
* @param string $branch Branch name.
* @param string $provider Provider key.
* @param string $pat Personal access token.
* @param array $tree Full repo tree (from oribi_sync_fetch_tree).
* @param bool $dry_run If true, report changes without writing.
* @return array{created: string[], updated: string[], trashed: string[], skipped: string[], errors: string[]}
*/
function oribi_sync_run_posts(
string $api_base,
string $branch,
string $provider,
string $pat,
array $tree,
bool $dry_run = false
): array {
$result = [
'created' => [],
'updated' => [],
'trashed' => [],
'skipped' => [],
'errors' => [],
];
// Feature gate
if ( ! get_option( 'oribi_sync_posts_enabled', '' ) ) {
return $result;
}
$posts_folder = get_option( 'oribi_sync_posts_folder', 'posts' ) ?: 'posts';
$repo_url = get_option( 'oribi_sync_repo', '' );
// Filter tree to posts folder (allow recursive subdirectories)
$post_files = oribi_sync_filter_tree( $tree, $posts_folder, true );
if ( empty( $post_files ) ) {
$result['skipped'][] = 'No Markdown files found under ' . $posts_folder . '/ in the repository.';
return $result;
}
$synced_slugs = [];
foreach ( $post_files as $entry ) {
$filename = basename( $entry['path'] );
$ext = strtolower( pathinfo( $filename, PATHINFO_EXTENSION ) );
// Only process Markdown files
if ( ! in_array( $ext, [ 'md', 'markdown' ], true ) ) {
continue;
}
// Derive slug before fetching content (for fast-path dedup)
$slug = oribi_sync_post_filename_to_slug( $filename );
if ( empty( $slug ) ) {
$result['skipped'][] = $entry['path'] . ' (could not derive slug)';
continue;
}
$synced_slugs[] = $slug;
$existing = oribi_sync_get_synced_post_by_slug( $slug );
// ── Fast-path: skip identical git SHA ─────────────────────────────
$git_sha = $entry['sha'] ?? '';
$stored_git_sha = $existing ? get_post_meta( $existing->ID, '_oribi_sync_git_sha', true ) : '';
if ( $existing && ! empty( $git_sha ) && $git_sha === $stored_git_sha ) {
$result['skipped'][] = $slug . ' (unchanged)';
if ( ! $dry_run ) {
update_post_meta( $existing->ID, '_oribi_sync_last_run', current_time( 'mysql' ) );
}
continue;
}
// ── Fetch raw file ─────────────────────────────────────────────────
$raw = oribi_sync_fetch_file( $api_base, $branch, $entry['path'], $provider, $pat );
if ( is_wp_error( $raw ) ) {
$result['errors'][] = $entry['path'] . ': ' . $raw->get_error_message();
continue;
}
$raw = (string) $raw;
// ── Parse front-matter + Markdown ──────────────────────────────────
$parsed = oribi_sync_parse_front_matter( $raw );
$fm = $parsed['front_matter'];
$body = $parsed['body'];
// ── Map front-matter to WP post fields ─────────────────────────────
$post_title = ! empty( $fm['title'] ) ? $fm['title'] : oribi_sync_slug_to_title( $slug );
$post_name = ! empty( $fm['slug'] ) ? sanitize_title( $fm['slug'] ) : $slug;
$post_status = ! empty( $fm['status'] ) ? $fm['status'] : 'publish';
$post_excerpt = ! empty( $fm['excerpt'] ) ? $fm['excerpt'] : '';
$author_id = oribi_sync_resolve_author( $fm['author'] ?? '' );
// Resolve date: front-matter takes priority, then filename prefix
$post_date = '';
if ( ! empty( $fm['date'] ) ) {
$ts = strtotime( $fm['date'] );
if ( $ts ) {
$post_date = gmdate( 'Y-m-d H:i:s', $ts );
}
} elseif ( preg_match( '/^(\d{4}-\d{2}-\d{2})-/', $filename, $dpm ) ) {
$ts = strtotime( $dpm[1] );
if ( $ts ) {
$post_date = gmdate( 'Y-m-d H:i:s', $ts );
}
}
$categories = isset( $fm['categories'] ) ? (array) $fm['categories'] : [];
$tags = isset( $fm['tags'] ) ? (array) $fm['tags'] : [];
$featured_img = $fm['featured_image'] ?? '';
// ── Convert Markdown body to HTML ──────────────────────────────────
$html_content = oribi_sync_markdown_to_html( $body );
$checksum = hash( 'sha256', $raw );
// ── Dry-run ────────────────────────────────────────────────────────
if ( $dry_run ) {
if ( $existing ) {
$old_checksum = get_post_meta( $existing->ID, '_oribi_sync_checksum', true );
if ( empty( $git_sha ) && $old_checksum === $checksum ) {
$result['skipped'][] = $slug . ' (unchanged)';
} else {
$result['updated'][] = $slug;
}
} else {
$result['created'][] = $slug;
}
continue;
}
// ── Build WP post array ────────────────────────────────────────────
$post_arr = [
'post_title' => $post_title,
'post_name' => $post_name,
'post_status' => $post_status,
'post_type' => 'post',
'post_content' => $html_content,
'post_author' => $author_id,
'post_excerpt' => $post_excerpt,
];
if ( $post_date ) {
$post_arr['post_date'] = $post_date;
$post_arr['post_date_gmt'] = get_gmt_from_date( $post_date );
}
// ── Create or update ───────────────────────────────────────────────
if ( $existing ) {
// Checksum fallback for providers without tree SHA
if ( empty( $git_sha ) ) {
$old_checksum = get_post_meta( $existing->ID, '_oribi_sync_checksum', true );
if ( $old_checksum === $checksum ) {
$result['skipped'][] = $slug . ' (unchanged)';
update_post_meta( $existing->ID, '_oribi_sync_last_run', current_time( 'mysql' ) );
continue;
}
}
$post_arr['ID'] = $existing->ID;
$post_id = wp_update_post( $post_arr, true );
if ( is_wp_error( $post_id ) ) {
$result['errors'][] = $slug . ': ' . $post_id->get_error_message();
continue;
}
$result['updated'][] = $slug;
} else {
$post_id = wp_insert_post( $post_arr, true );
if ( is_wp_error( $post_id ) ) {
$result['errors'][] = $slug . ': ' . $post_id->get_error_message();
continue;
}
$result['created'][] = $slug;
}
// ── Save sync meta ──────────────────────────────────────────────────
update_post_meta( $post_id, '_oribi_sync_checksum', $checksum );
update_post_meta( $post_id, '_oribi_sync_git_sha', $git_sha );
update_post_meta( $post_id, '_oribi_sync_source', $repo_url . '@' . $branch . ':' . $entry['path'] );
update_post_meta( $post_id, '_oribi_sync_last_run', current_time( 'mysql' ) );
// ── Taxonomies ──────────────────────────────────────────────────────
if ( ! empty( $categories ) ) {
$cat_ids = [];
foreach ( $categories as $cat_name ) {
$cat_name = trim( $cat_name );
if ( empty( $cat_name ) ) continue;
$term = term_exists( $cat_name, 'category' );
if ( ! $term ) {
$term = wp_insert_term( $cat_name, 'category' );
}
if ( ! is_wp_error( $term ) ) {
$cat_ids[] = (int) ( is_array( $term ) ? $term['term_id'] : $term );
}
}
if ( ! empty( $cat_ids ) ) {
wp_set_post_categories( $post_id, $cat_ids );
}
}
if ( ! empty( $tags ) ) {
$tag_list = array_filter( array_map( 'trim', $tags ) );
if ( ! empty( $tag_list ) ) {
wp_set_post_tags( $post_id, $tag_list );
}
}
// ── Media import & URL rewriting ────────────────────────────────────
$rewritten = oribi_sync_import_media_in_content(
$post_id, $html_content,
$api_base, $branch, $provider, $pat,
$entry['path']
);
if ( $rewritten !== $html_content ) {
wp_update_post( [ 'ID' => $post_id, 'post_content' => $rewritten ] );
}
// ── Featured image ──────────────────────────────────────────────────
if ( ! empty( $featured_img ) ) {
$att_id = oribi_sync_sideload_attachment(
$featured_img, $post_id,
$api_base, $branch, $provider, $pat,
$entry['path']
);
if ( $att_id > 0 ) {
set_post_thumbnail( $post_id, $att_id );
}
}
}
// ── Trash posts removed from repo ──────────────────────────────────────
if ( ! $dry_run && ! empty( $synced_slugs ) ) {
$result['trashed'] = oribi_sync_trash_removed_posts( $synced_slugs );
}
return $result;
}
// ─── Trash removed posts ──────────────────────────────────────────────────────
/**
* Trash WP posts that were previously synced but are no longer in the repo.
*
* @param string[] $current_slugs Slugs present in the current repo tree.
* @return string[]
*/
function oribi_sync_trash_removed_posts( array $current_slugs ): array {
$trashed = [];
$query = new WP_Query( [
'post_type' => 'post',
'post_status' => [ 'publish', 'draft', 'pending', 'private' ],
'meta_key' => '_oribi_sync_checksum',
'posts_per_page' => -1,
'fields' => 'ids',
] );
foreach ( $query->posts as $post_id ) {
$post = get_post( $post_id );
if ( ! $post ) continue;
if ( ! in_array( $post->post_name, $current_slugs, true ) ) {
wp_trash_post( $post->ID );
$trashed[] = $post->post_name;
}
}
return $trashed;
}
// ─── Markdown generation (for push) ──────────────────────────────────────────
/**
* Generate a Markdown file (with YAML front-matter) from a WP post.
*
* The post_content (HTML) is stored as the body — raw HTML is valid in
* most Markdown flavours and renders correctly when re-imported.
*
* @param WP_Post $post
* @return string Markdown source.
*/
function oribi_sync_generate_post_markdown( WP_Post $post ): string {
$fm = "---\n";
// Title (escape newlines)
$fm .= 'title: ' . str_replace( [ "\r", "\n" ], ' ', $post->post_title ) . "\n";
$fm .= 'slug: ' . $post->post_name . "\n";
$fm .= 'status: ' . $post->post_status . "\n";
// Date
if ( ! empty( $post->post_date ) && '0000-00-00 00:00:00' !== $post->post_date ) {
$fm .= 'date: ' . substr( $post->post_date, 0, 10 ) . "\n";
}
// Author
$author = get_user_by( 'id', $post->post_author );
if ( $author ) {
$fm .= 'author: ' . $author->user_login . "\n";
}
// Categories
$cats = get_the_category( $post->ID );
if ( ! empty( $cats ) ) {
$fm .= "categories:\n";
foreach ( $cats as $cat ) {
$fm .= ' - ' . $cat->name . "\n";
}
}
// Tags
$post_tags = get_the_tags( $post->ID );
if ( ! empty( $post_tags ) ) {
$fm .= "tags:\n";
foreach ( $post_tags as $tag ) {
$fm .= ' - ' . $tag->name . "\n";
}
}
// Excerpt
if ( ! empty( $post->post_excerpt ) ) {
$fm .= 'excerpt: ' . str_replace( [ "\r", "\n" ], ' ', $post->post_excerpt ) . "\n";
}
// Featured image (absolute URL so it round-trips cleanly)
$thumb_id = get_post_thumbnail_id( $post->ID );
if ( $thumb_id ) {
$thumb_url = wp_get_attachment_url( $thumb_id );
if ( $thumb_url ) {
$fm .= 'featured_image: ' . $thumb_url . "\n";
}
}
$fm .= "---\n\n";
return $fm . $post->post_content;
}
// ─── Push post to repo ────────────────────────────────────────────────────────
/**
* Push a single WordPress post back to the Git repository as a Markdown file.
*
* Follows the same Gitea-only flow as oribi_sync_push_page():
* - Direct commit if remote SHA matches stored SHA.
* - Branch + PR if a conflict is detected.
* - Create file if it does not yet exist.
*
* @param int $post_id WP post ID (post_type must be 'post').
* @param array $opts Optional: 'message' (commit message).
* @return array{ok: bool, action: string, message: string, pr_url?: string}
*/
function oribi_sync_push_post( int $post_id, array $opts = [] ): array {
$post = get_post( $post_id );
if ( ! $post || $post->post_type !== 'post' ) {
return [ 'ok' => false, 'action' => 'error', 'message' => 'Post not found or not of type "post".' ];
}
$repo_url = get_option( 'oribi_sync_repo', '' );
$branch = get_option( 'oribi_sync_branch', 'main' ) ?: 'main';
$pat = oribi_sync_get_pat();
$provider = oribi_sync_get_provider();
if ( empty( $repo_url ) || empty( $pat ) ) {
return [ 'ok' => false, 'action' => 'error', 'message' => 'Repository URL or PAT not configured.' ];
}
if ( $provider !== 'gitea' ) {
return [ 'ok' => false, 'action' => 'error', 'message' => 'Push is currently supported for Gitea / Forgejo only.' ];
}
$parsed = oribi_sync_parse_repo_url( $repo_url );
if ( is_wp_error( $parsed ) ) {
return [ 'ok' => false, 'action' => 'error', 'message' => $parsed->get_error_message() ];
}
$api_base = oribi_sync_api_base( $provider, $parsed );
// ── Determine repo path ───────────────────────────────────────────────────
$repo_path = '';
$source_meta = get_post_meta( $post_id, '_oribi_sync_source', true );
if ( ! empty( $source_meta ) ) {
$colon_pos = strrpos( $source_meta, ':' );
if ( $colon_pos !== false ) {
$at_pos = strrpos( substr( $source_meta, 0, $colon_pos ), '@' );
if ( $at_pos !== false ) {
$candidate = substr( $source_meta, $colon_pos + 1 );
$posts_folder = get_option( 'oribi_sync_posts_folder', 'posts' ) ?: 'posts';
if ( strncasecmp( $candidate, $posts_folder . '/', strlen( $posts_folder ) + 1 ) === 0 ) {
$repo_path = $candidate;
}
}
}
}
if ( empty( $repo_path ) ) {
$posts_folder = get_option( 'oribi_sync_posts_folder', 'posts' ) ?: 'posts';
$repo_path = rtrim( $posts_folder, '/' ) . '/' . $post->post_name . '.md';
}
$markdown_content = oribi_sync_generate_post_markdown( $post );
$commit_msg = $opts['message'] ?? 'Sync: update post ' . $post->post_name . ' from WordPress';
$new_checksum = hash( 'sha256', $markdown_content );
$stored_sha = get_post_meta( $post_id, '_oribi_sync_git_sha', true );
// ── Fetch remote file metadata ────────────────────────────────────────────
$remote = oribi_sync_gitea_get_file_meta( $api_base, $branch, $repo_path, $pat );
if ( is_wp_error( $remote ) ) {
return [ 'ok' => false, 'action' => 'error', 'message' => 'Failed to check remote file: ' . $remote->get_error_message() ];
}
// ── File doesn't exist: create ────────────────────────────────────────────
if ( $remote === null ) {
$result = oribi_sync_gitea_put_file( $api_base, $branch, $repo_path, $markdown_content, $pat, null, $commit_msg );
if ( is_wp_error( $result ) ) {
return [ 'ok' => false, 'action' => 'error', 'message' => 'Create failed: ' . $result->get_error_message() ];
}
if ( $result['code'] < 200 || $result['code'] >= 300 ) {
$err = is_array( $result['body'] ) ? ( $result['body']['message'] ?? wp_json_encode( $result['body'] ) ) : $result['body'];
return [ 'ok' => false, 'action' => 'error', 'message' => "Create failed (HTTP {$result['code']}): {$err}" ];
}
$new_sha = $result['body']['content']['sha'] ?? '';
oribi_sync_update_push_meta( $post_id, $new_sha, $new_checksum, $repo_url, $branch, $repo_path );
oribi_sync_log_push( $post->post_name, 'created', $branch );
return [ 'ok' => true, 'action' => 'created', 'message' => "Created {$repo_path} on branch {$branch}." ];
}
$remote_sha = $remote['sha'];
$has_conflict = ! empty( $stored_sha ) && $remote_sha !== $stored_sha;
// ── Conflict: create branch + PR ─────────────────────────────────────────
if ( $has_conflict ) {
$timestamp = gmdate( 'Ymd-His' );
$new_branch = 'oribi-sync/' . $post->post_name . '-' . $timestamp;
$branch_result = oribi_sync_gitea_create_branch( $api_base, $new_branch, $branch, $pat );
if ( is_wp_error( $branch_result ) || $branch_result['code'] < 200 || $branch_result['code'] >= 300 ) {
$msg = is_wp_error( $branch_result )
? $branch_result->get_error_message()
: ( $branch_result['body']['message'] ?? "HTTP {$branch_result['code']}" );
return [ 'ok' => false, 'action' => 'error', 'message' => 'Branch creation failed: ' . $msg ];
}
$branch_remote = oribi_sync_gitea_get_file_meta( $api_base, $new_branch, $repo_path, $pat );
$branch_sha = ( ! is_wp_error( $branch_remote ) && $branch_remote !== null ) ? $branch_remote['sha'] : null;
$put_result = oribi_sync_gitea_put_file( $api_base, $new_branch, $repo_path, $markdown_content, $pat, $branch_sha, $commit_msg );
if ( is_wp_error( $put_result ) || $put_result['code'] < 200 || $put_result['code'] >= 300 ) {
$msg = is_wp_error( $put_result )
? $put_result->get_error_message()
: ( $put_result['body']['message'] ?? "HTTP {$put_result['code']}" );
return [ 'ok' => false, 'action' => 'error', 'message' => 'Commit to branch failed: ' . $msg ];
}
$pr_title = 'Sync: post ' . $post->post_name;
$pr_body = "Automatic push from WordPress (Oribi Tech Sync).\n\n";
$pr_body .= "**Post:** {$post->post_title} (`{$post->post_name}`)\n\n";
$pr_body .= "The target branch `{$branch}` has been modified since the last sync, ";
$pr_body .= "so this change was pushed to `{$new_branch}` for review.\n";
$pr_result = oribi_sync_gitea_create_pr( $api_base, $new_branch, $branch, $pr_title, $pr_body, $pat );
$pr_url = '';
if ( ! is_wp_error( $pr_result ) && $pr_result['code'] >= 200 && $pr_result['code'] < 300 ) {
$pr_url = $pr_result['body']['html_url'] ?? '';
}
if ( $pr_url ) {
update_post_meta( $post_id, '_oribi_sync_pr_url', $pr_url );
}
oribi_sync_log_push( $post->post_name, 'pr_created', $new_branch, $pr_url );
return [ 'ok' => true, 'action' => 'pr_created', 'message' => "Conflict — created PR on branch {$new_branch}.", 'pr_url' => $pr_url ];
}
// ── No conflict: direct update ────────────────────────────────────────────
$result = oribi_sync_gitea_put_file( $api_base, $branch, $repo_path, $markdown_content, $pat, $remote_sha, $commit_msg );
if ( is_wp_error( $result ) ) {
return [ 'ok' => false, 'action' => 'error', 'message' => 'Update failed: ' . $result->get_error_message() ];
}
if ( $result['code'] < 200 || $result['code'] >= 300 ) {
$err = is_array( $result['body'] ) ? ( $result['body']['message'] ?? wp_json_encode( $result['body'] ) ) : $result['body'];
return [ 'ok' => false, 'action' => 'error', 'message' => "Update failed (HTTP {$result['code']}): {$err}" ];
}
$new_sha = $result['body']['content']['sha'] ?? '';
oribi_sync_update_push_meta( $post_id, $new_sha, $new_checksum, $repo_url, $branch, $repo_path );
oribi_sync_log_push( $post->post_name, 'updated', $branch );
return [ 'ok' => true, 'action' => 'updated', 'message' => "Updated {$repo_path} on branch {$branch}." ];
}