From 84f8fc6db5589a4c5280c5d686d43ee85a2457a8 Mon Sep 17 00:00:00 2001 From: Kevin Bataille Date: Mon, 16 Feb 2026 23:50:24 +0100 Subject: [PATCH] Add post migration and author filter features - Add migrate command to transfer posts between websites - Support CSV-based and filtered migration modes - Preserve original post dates (with --ignore-original-date option) - Auto-create categories and tags on destination site - Add author filtering to export (--author and --author-id flags) - Include author_name column in exported CSV - Add comprehensive documentation (MIGRATION_GUIDE.md, AUTHOR_FILTER_GUIDE.md) Co-authored-by: Qwen-Coder --- AUTHOR_FILTER_GUIDE.md | 226 +++++++++ MIGRATION_GUIDE.md | 269 ++++++++++ src/seo/app.py | 118 ++++- src/seo/cli.py | 166 ++++++- src/seo/exporter.py | 146 +++++- src/seo/post_migrator.py | 1007 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 1903 insertions(+), 29 deletions(-) create mode 100644 AUTHOR_FILTER_GUIDE.md create mode 100644 MIGRATION_GUIDE.md create mode 100644 src/seo/post_migrator.py diff --git a/AUTHOR_FILTER_GUIDE.md b/AUTHOR_FILTER_GUIDE.md new file mode 100644 index 0000000..553c45f --- /dev/null +++ b/AUTHOR_FILTER_GUIDE.md @@ -0,0 +1,226 @@ +# Author Filter Guide + +Export posts from specific authors using the enhanced export functionality. + +## Overview + +The export command now supports filtering posts by author name or author ID, making it easy to: +- Export posts from a specific author across all sites +- Combine author filtering with site filtering +- Export posts from multiple authors at once + +## Usage + +### Filter by Author Name + +Export posts from a specific author (case-insensitive, partial match): + +```bash +# Export posts by "John Doe" +./seo export --author "John Doe" + +# Export posts by "admin" (partial match) +./seo export --author admin + +# Export posts from multiple authors +./seo export --author "John Doe" "Jane Smith" +``` + +### Filter by Author ID + +Export posts from specific author IDs: + +```bash +# Export posts by author ID 1 +./seo export --author-id 1 + +# Export posts from multiple author IDs +./seo export --author-id 1 2 3 +``` + +### Combine with Site Filter + +Export posts from a specific author on a specific site: + +```bash +# Export John's posts from mistergeek.net only +./seo export --author "John Doe" --site mistergeek.net + +# Export posts by author ID 1 from webscroll.fr +./seo export --author-id 1 -s webscroll.fr +``` + +### Dry Run Mode + +Preview what would be exported: + +```bash +./seo export --author "John Doe" --dry-run +``` + +## How It Works + +1. **Author Name Matching** + - Case-insensitive matching + - Partial matches work (e.g., "john" matches "John Doe") + - Matches against author's display name and slug + +2. **Author ID Matching** + - Exact match on WordPress user ID + - More reliable than name matching + - Useful when authors have similar names + +3. **Author Information** + - The exporter fetches all authors from each site + - Author names are included in the exported CSV + - Posts are filtered before export + +## Export Output + +The exported CSV includes author information: + +```csv +site,post_id,status,title,slug,url,author_id,author_name,date_published,... +mistergeek.net,123,publish,"VPN Guide",vpn-guide,https://...,1,John Doe,2024-01-15,... +``` + +### New Column: `author_name` + +The export now includes the author's display name in addition to the author ID. + +## Examples + +### Example 1: Export All Posts by Admin + +```bash +./seo export --author admin +``` + +Output: `output/all_posts_YYYY-MM-DD.csv` + +### Example 2: Export Specific Author from Specific Site + +```bash +./seo export --author "Marie" --site webscroll.fr +``` + +### Example 3: Export Multiple Authors + +```bash +./seo export --author "John" "Marie" "Admin" +``` + +### Example 4: Export by Author ID + +```bash +./seo export --author-id 5 +``` + +### Example 5: Combine Author and Site Filters + +```bash +./seo export --author "John" --site mistergeek.net --verbose +``` + +## Finding Author IDs + +If you don't know the author ID, you can: + +1. **Export all posts and check the CSV:** + ```bash + ./seo export + # Then open the CSV and check the author_id column + ``` + +2. **Use WordPress Admin:** + - Go to Users → All Users + - Hover over a user name + - The URL shows the user ID (e.g., `user_id=5`) + +3. **Use WordPress REST API directly:** + ```bash + curl -u username:password https://yoursite.com/wp-json/wp/v2/users + ``` + +## Tips + +1. **Use quotes for names with spaces:** + ```bash + ./seo export --author "John Doe" # āœ“ Correct + ./seo export --author John Doe # āœ— Wrong (treated as 2 authors) + ``` + +2. **Partial matching is your friend:** + ```bash + ./seo export --author "john" # Matches "John Doe", "Johnny", etc. + ``` + +3. **Combine with migration:** + ```bash + # Export author's posts, then migrate to another site + ./seo export --author "John Doe" --site webscroll.fr + ./seo migrate output/all_posts_*.csv --destination mistergeek.net + ``` + +4. **Verbose mode for debugging:** + ```bash + ./seo export --author "John" --verbose + ``` + +## Troubleshooting + +### No posts exported + +**Possible causes:** +- Author name doesn't match (try different spelling) +- Author has no posts +- Author doesn't exist on that site + +**Solutions:** +- Use `--verbose` to see what's happening +- Try author ID instead of name +- Check if author exists on the site + +### Author names not showing in CSV + +**Possible causes:** +- WordPress REST API doesn't allow user enumeration +- Authentication issue + +**Solutions:** +- Check WordPress user permissions +- Verify credentials in config +- Author ID will still be present even if name lookup fails + +## API Usage + +Use author filtering programmatically: + +```python +from seo.app import SEOApp + +app = SEOApp() + +# Export by author name +csv_file = app.export(author_filter=["John Doe"]) + +# Export by author ID +csv_file = app.export(author_ids=[1, 2]) + +# Export by author and site +csv_file = app.export( + author_filter=["John"], + site_filter="mistergeek.net" +) +``` + +## Related Commands + +- `seo migrate` - Migrate exported posts to another site +- `seo analyze` - Analyze exported posts with AI +- `seo export --help` - Show all export options + +## See Also + +- [MIGRATION_GUIDE.md](MIGRATION_GUIDE.md) - Post migration guide +- [README.md](README.md) - Main documentation diff --git a/MIGRATION_GUIDE.md b/MIGRATION_GUIDE.md new file mode 100644 index 0000000..dc8b5d3 --- /dev/null +++ b/MIGRATION_GUIDE.md @@ -0,0 +1,269 @@ +# Post Migration Guide + +This guide explains how to migrate posts between WordPress sites using the SEO automation tool. + +## Overview + +The migration feature allows you to move posts from one WordPress site to another while preserving: +- Post content (title, body, excerpt) +- Categories (automatically created if they don't exist) +- Tags (automatically created if they don't exist) +- SEO metadata (RankMath, Yoast SEO) +- Post slug + +## Migration Modes + +There are two ways to migrate posts: + +### 1. CSV-Based Migration + +Migrate specific posts listed in a CSV file. + +**Requirements:** +- CSV file with at least two columns: `site` and `post_id` + +**Usage:** +```bash +# Basic migration (posts deleted from source after migration) +./seo migrate posts_to_migrate.csv --destination mistergeek.net + +# Keep posts on source site +./seo migrate posts_to_migrate.csv --destination mistergeek.net --keep-source + +# Publish immediately instead of draft +./seo migrate posts_to_migrate.csv --destination mistergeek.net --post-status publish + +# Custom output file for migration report +./seo migrate posts_to_migrate.csv --destination mistergeek.net --output custom_report.csv +``` + +### 2. Filtered Migration + +Migrate posts based on filters (category, date, etc.). + +**Usage:** +```bash +# Migrate all posts from source to destination +./seo migrate --source webscroll.fr --destination mistergeek.net + +# Migrate posts from specific categories +./seo migrate --source webscroll.fr --destination mistergeek.net --category-filter VPN "Torrent Clients" + +# Migrate posts with specific tags +./seo migrate --source webscroll.fr --destination mistergeek.net --tag-filter "guide" "tutorial" + +# Migrate posts by date range +./seo migrate --source webscroll.fr --destination mistergeek.net --date-after 2024-01-01 --date-before 2024-12-31 + +# Limit number of posts +./seo migrate --source webscroll.fr --destination mistergeek.net --limit 10 + +# Combine filters +./seo migrate --source webscroll.fr --destination mistergeek.net \ + --category-filter VPN \ + --date-after 2024-01-01 \ + --limit 5 \ + --keep-source +``` + +## Command Options + +### Required Options + +- `--destination`, `--to`: Destination site (mistergeek.net, webscroll.fr, hellogeek.net) +- `--source`, `--from`: Source site (for filtered migration only) +- CSV file: Path to CSV with posts (for CSV-based migration) + +### Optional Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--keep-source` | Keep posts on source site after migration | Delete after migration | +| `--post-status` | Status for migrated posts (draft, publish, pending) | draft | +| `--no-categories` | Don't create categories automatically | Create categories | +| `--no-tags` | Don't create tags automatically | Create tags | +| `--category-filter` | Filter by category names (filtered migration) | All categories | +| `--tag-filter` | Filter by tag names (filtered migration) | All tags | +| `--date-after` | Migrate posts after this date (YYYY-MM-DD) | No limit | +| `--date-before` | Migrate posts before this date (YYYY-MM-DD) | No limit | +| `--limit` | Maximum number of posts to migrate | No limit | +| `--output`, `-o` | Custom output file for migration report | Auto-generated | +| `--dry-run` | Preview what would be done without doing it | Execute | +| `--verbose`, `-v` | Enable verbose logging | Normal logging | + +## Migration Process + +### What Gets Migrated + +1. **Post Content** + - Title + - Body content (HTML preserved) + - Excerpt + - Slug + +2. **Categories** + - Mapped from source to destination + - Created automatically if they don't exist on destination + - Hierarchical structure preserved (parent-child relationships) + +3. **Tags** + - Mapped from source to destination + - Created automatically if they don't exist on destination + +4. **SEO Metadata** + - RankMath title and description + - Yoast SEO title and description + - Focus keywords + +### What Doesn't Get Migrated + +- Featured images (must be re-uploaded manually) +- Post author (uses destination site's default) +- Comments (not transferred) +- Custom fields (except SEO metadata) +- Post revisions + +## Migration Report + +After migration, a CSV report is generated in `output/` with the following information: + +```csv +source_site,source_post_id,destination_site,destination_post_id,title,status,categories_migrated,tags_migrated,deleted_from_source +webscroll.fr,123,mistergeek.net,456,"VPN Guide",draft,3,5,True +``` + +## Examples + +### Example 1: Migrate Specific Posts from CSV + +1. Create a CSV file with posts to migrate: +```csv +site,post_id,title +webscroll.fr,123,VPN Guide +webscroll.fr,456,Torrent Tutorial +``` + +2. Run migration: +```bash +./seo migrate my_posts.csv --destination mistergeek.net +``` + +### Example 2: Migrate All VPN Content + +```bash +./seo migrate --source webscroll.fr --destination mistergeek.net \ + --category-filter VPN "VPN Reviews" \ + --post-status draft \ + --keep-source +``` + +### Example 3: Migrate Recent Content + +```bash +./seo migrate --source webscroll.fr --destination mistergeek.net \ + --date-after 2024-06-01 \ + --limit 20 +``` + +### Example 4: Preview Migration + +```bash +./seo migrate --source webscroll.fr --destination mistergeek.net \ + --category-filter VPN \ + --dry-run +``` + +## Best Practices + +### Before Migration + +1. **Backup both sites** - Always backup before bulk operations +2. **Test with a few posts** - Migrate 1-2 posts first to verify +3. **Check category structure** - Review destination site's categories +4. **Plan URL redirects** - If deleting from source, set up redirects + +### During Migration + +1. **Use dry-run first** - Preview what will be migrated +2. **Start with drafts** - Review before publishing +3. **Monitor logs** - Watch for errors or warnings +4. **Limit batch size** - Migrate in batches of 10-20 posts + +### After Migration + +1. **Review migrated posts** - Check formatting and categories +2. **Add featured images** - Manually upload if needed +3. **Set up redirects** - From old URLs to new URLs +4. **Update internal links** - Fix cross-site links +5. **Monitor SEO** - Track rankings after migration + +## Troubleshooting + +### Common Issues + +**1. "Site not found" error** +- Check site name is correct (mistergeek.net, webscroll.fr, hellogeek.net) +- Verify credentials in config.yaml or .env + +**2. "Category already exists" warning** +- This is normal - the migrator found a matching category +- The existing category will be used + +**3. "Failed to create post" error** +- Check WordPress REST API is enabled +- Verify user has post creation permissions +- Check authentication credentials + +**4. Posts missing featured images** +- Featured images are not migrated automatically +- Upload images manually to destination site +- Update featured image on migrated posts + +**5. Categories not matching** +- Categories are matched by name (case-insensitive) +- "VPN" and "vpn" will match +- "VPN Guide" and "VPN" will NOT match - new category created + +## API Usage + +You can also use the migration feature programmatically: + +```python +from seo.app import SEOApp + +app = SEOApp() + +# CSV-based migration +app.migrate( + csv_file='output/posts_to_migrate.csv', + destination_site='mistergeek.net', + create_categories=True, + create_tags=True, + delete_after=False, + status='draft' +) + +# Filtered migration +app.migrate_by_filter( + source_site='webscroll.fr', + destination_site='mistergeek.net', + category_filter=['VPN', 'Software'], + date_after='2024-01-01', + limit=10, + create_categories=True, + delete_after=False, + status='draft' +) +``` + +## Related Commands + +- `seo export` - Export posts from all sites +- `seo editorial_strategy` - Analyze and get migration recommendations +- `seo category_propose` - Get AI category recommendations + +## See Also + +- [README.md](README.md) - Main documentation +- [ARCHITECTURE.md](ARCHITECTURE.md) - System architecture +- [CATEGORY_MANAGEMENT_GUIDE.md](CATEGORY_MANAGEMENT_GUIDE.md) - Category management diff --git a/src/seo/app.py b/src/seo/app.py index 684f9eb..e216f7f 100644 --- a/src/seo/app.py +++ b/src/seo/app.py @@ -12,6 +12,7 @@ from .analyzer import EnhancedPostAnalyzer from .category_proposer import CategoryProposer from .category_manager import WordPressCategoryManager, CategoryAssignmentProcessor from .editorial_strategy import EditorialStrategyAnalyzer +from .post_migrator import WordPressPostMigrator logger = logging.getLogger(__name__) @@ -34,11 +35,23 @@ class SEOApp: else: logging.basicConfig(level=logging.INFO) - def export(self) -> str: - """Export all posts from WordPress sites.""" + def export(self, author_filter: Optional[List[str]] = None, + author_ids: Optional[List[int]] = None, + site_filter: Optional[str] = None) -> str: + """ + Export all posts from WordPress sites. + + Args: + author_filter: List of author names to filter by + author_ids: List of author IDs to filter by + site_filter: Export from specific site only + + Returns: + Path to exported CSV file + """ logger.info("šŸ“¦ Exporting all posts from WordPress sites...") - exporter = PostExporter() - return exporter.run() + exporter = PostExporter(author_filter=author_filter, author_ids=author_ids) + return exporter.run(site_filter=site_filter) def analyze(self, csv_file: Optional[str] = None, fields: Optional[List[str]] = None, update: bool = False, output: Optional[str] = None) -> str: @@ -146,23 +159,110 @@ class SEOApp: def editorial_strategy(self, csv_file: Optional[str] = None) -> dict: """ Analyze editorial strategy and recommend migrations. - + Args: csv_file: Path to posts CSV (uses latest export if not provided) - + Returns: Analysis results dict """ logger.info("šŸ“Š Analyzing editorial strategy...") - + if not csv_file: csv_file = self._find_latest_export() - + if not csv_file: raise FileNotFoundError("No exported posts found. Run export() first.") - + analyzer = EditorialStrategyAnalyzer() return analyzer.run(csv_file) + + def migrate(self, csv_file: str, destination_site: str, + create_categories: bool = True, create_tags: bool = True, + delete_after: bool = False, status: str = 'draft', + output_file: Optional[str] = None, + ignore_original_date: bool = False) -> str: + """ + Migrate posts from CSV file to destination site. + + Args: + csv_file: Path to CSV file with posts to migrate (must have 'site' and 'post_id' columns) + destination_site: Destination site name (mistergeek.net, webscroll.fr, hellogeek.net) + create_categories: If True, create categories if they don't exist + create_tags: If True, create tags if they don't exist + delete_after: If True, delete posts from source after migration + status: Status for new posts ('draft', 'publish', 'pending') + output_file: Custom output file path for migration report + ignore_original_date: If True, use current date instead of original post date + + Returns: + Path to migration report CSV + """ + logger.info(f"šŸš€ Migrating posts to {destination_site}...") + + migrator = WordPressPostMigrator() + return migrator.migrate_posts_from_csv( + csv_file=csv_file, + destination_site=destination_site, + create_categories=create_categories, + create_tags=create_tags, + delete_after=delete_after, + status=status, + output_file=output_file, + ignore_original_date=ignore_original_date + ) + + def migrate_by_filter(self, source_site: str, destination_site: str, + category_filter: Optional[List[str]] = None, + tag_filter: Optional[List[str]] = None, + date_after: Optional[str] = None, + date_before: Optional[str] = None, + status_filter: Optional[List[str]] = None, + create_categories: bool = True, + create_tags: bool = True, + delete_after: bool = False, + status: str = 'draft', + limit: Optional[int] = None, + ignore_original_date: bool = False) -> str: + """ + Migrate posts based on filters. + + Args: + source_site: Source site name + destination_site: Destination site name + category_filter: List of category names to filter by + tag_filter: List of tag names to filter by + date_after: Only migrate posts after this date (YYYY-MM-DD) + date_before: Only migrate posts before this date (YYYY-MM-DD) + status_filter: List of statuses to filter by (e.g., ['publish', 'draft']) + create_categories: If True, create categories if they don't exist + create_tags: If True, create tags if they don't exist + delete_after: If True, delete posts from source after migration + status: Status for new posts + limit: Maximum number of posts to migrate + ignore_original_date: If True, use current date instead of original post date + + Returns: + Path to migration report CSV + """ + logger.info(f"šŸš€ Migrating posts from {source_site} to {destination_site}...") + + migrator = WordPressPostMigrator() + return migrator.migrate_posts_by_filter( + source_site=source_site, + destination_site=destination_site, + category_filter=category_filter, + tag_filter=tag_filter, + date_after=date_after, + date_before=date_before, + status_filter=status_filter, + create_categories=create_categories, + create_tags=create_tags, + delete_after=delete_after, + status=status, + limit=limit, + ignore_original_date=ignore_original_date + ) def status(self) -> dict: """Get status of output files.""" diff --git a/src/seo/cli.py b/src/seo/cli.py index 2a07efe..f73efac 100644 --- a/src/seo/cli.py +++ b/src/seo/cli.py @@ -37,17 +37,38 @@ Examples: parser.add_argument('args', nargs='*', help='Arguments for the command') parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') parser.add_argument('--dry-run', action='store_true', help='Show what would be done') - parser.add_argument('--fields', '-f', nargs='+', + parser.add_argument('--fields', '-f', nargs='+', choices=['title', 'meta_description', 'categories', 'site'], help='Fields to analyze') parser.add_argument('--update', '-u', action='store_true', help='Update input file') parser.add_argument('--output', '-o', help='Output file path') - parser.add_argument('--confidence', '-c', choices=['High', 'Medium', 'Low'], + parser.add_argument('--confidence', '-c', choices=['High', 'Medium', 'Low'], default='Medium', help='Confidence threshold for category apply') parser.add_argument('--site', '-s', choices=['mistergeek.net', 'webscroll.fr', 'hellogeek.net'], help='WordPress site for category operations') parser.add_argument('--description', '-d', help='Category description') parser.add_argument('--strict', action='store_true', help='Strict confidence matching (exact match only)') + + # Export arguments + parser.add_argument('--author', nargs='+', help='Filter by author name(s) for export') + parser.add_argument('--author-id', type=int, nargs='+', help='Filter by author ID(s) for export') + + # Migration arguments + parser.add_argument('--destination', '--to', choices=['mistergeek.net', 'webscroll.fr', 'hellogeek.net'], + help='Destination site for migration') + parser.add_argument('--source', '--from', choices=['mistergeek.net', 'webscroll.fr', 'hellogeek.net'], + help='Source site for filtered migration') + parser.add_argument('--keep-source', action='store_true', help='Keep posts on source site (default: delete after migration)') + parser.add_argument('--post-status', choices=['draft', 'publish', 'pending'], default='draft', + help='Status for migrated posts (default: draft)') + parser.add_argument('--no-categories', action='store_true', help='Do not create categories automatically') + parser.add_argument('--no-tags', action='store_true', help='Do not create tags automatically') + parser.add_argument('--category-filter', nargs='+', help='Filter by category names (for filtered migration)') + parser.add_argument('--tag-filter', nargs='+', help='Filter by tag names (for filtered migration)') + parser.add_argument('--date-after', help='Migrate posts after this date (YYYY-MM-DD)') + parser.add_argument('--date-before', help='Migrate posts before this date (YYYY-MM-DD)') + parser.add_argument('--limit', type=int, help='Limit number of posts to migrate') + parser.add_argument('--ignore-original-date', action='store_true', help='Use current date instead of original post date') args = parser.parse_args() @@ -73,6 +94,7 @@ Examples: 'category_apply': cmd_category_apply, 'category_create': cmd_category_create, 'editorial_strategy': cmd_editorial_strategy, + 'migrate': cmd_migrate, 'status': cmd_status, 'help': cmd_help, } @@ -104,8 +126,19 @@ def cmd_export(app, args): """Export all posts.""" if args.dry_run: print("Would export all posts from WordPress sites") + if args.author: + print(f" Author filter: {args.author}") + if args.author_id: + print(f" Author ID filter: {args.author_id}") return 0 - app.export() + + result = app.export( + author_filter=args.author, + author_ids=args.author_id, + site_filter=args.site + ) + if result: + print(f"āœ… Export completed! Output: {result}") return 0 @@ -241,12 +274,12 @@ def cmd_editorial_strategy(app, args): if args.dry_run: print("Would analyze editorial strategy and recommend migrations") return 0 - + csv_file = args.args[0] if args.args else None - + print("Analyzing editorial strategy...") results = app.editorial_strategy(csv_file=csv_file) - + if results and results.get('report_file'): print(f"\nāœ… Editorial strategy analysis complete!") print(f" Report: {results['report_file']}") @@ -259,6 +292,94 @@ def cmd_editorial_strategy(app, args): return 0 +def cmd_migrate(app, args): + """Migrate posts between websites.""" + if args.dry_run: + print("Would migrate posts between websites") + if args.destination: + print(f" Destination: {args.destination}") + if args.source: + print(f" Source: {args.source}") + return 0 + + # Validate required arguments + if not args.destination: + print("āŒ Destination site required. Use --destination mistergeek.net|webscroll.fr|hellogeek.net") + return 1 + + delete_after = not args.keep_source + create_categories = not args.no_categories + create_tags = not args.no_tags + + # Check if using filtered migration or CSV-based migration + if args.source: + # Filtered migration + print(f"Migrating posts from {args.source} to {args.destination}") + print(f"Post status: {args.post_status}") + print(f"Delete after migration: {delete_after}") + if args.category_filter: + print(f"Category filter: {args.category_filter}") + if args.tag_filter: + print(f"Tag filter: {args.tag_filter}") + if args.date_after: + print(f"Date after: {args.date_after}") + if args.date_before: + print(f"Date before: {args.date_before}") + if args.limit: + print(f"Limit: {args.limit}") + + result = app.migrate_by_filter( + source_site=args.source, + destination_site=args.destination, + category_filter=args.category_filter, + tag_filter=args.tag_filter, + date_after=args.date_after, + date_before=args.date_before, + status_filter=None, + create_categories=create_categories, + create_tags=create_tags, + delete_after=delete_after, + status=args.post_status, + limit=args.limit, + ignore_original_date=args.ignore_original_date + ) + + if result: + print(f"\nāœ… Migration completed!") + print(f" Report: {result}") + else: + # CSV-based migration + csv_file = args.args[0] if args.args else None + + if not csv_file: + print("āŒ CSV file required. Provide path to CSV with 'site' and 'post_id' columns") + print(" Usage: seo migrate --destination ") + print(" Or use filtered migration: seo migrate --source --destination ") + return 1 + + print(f"Migrating posts from CSV: {csv_file}") + print(f"Destination: {args.destination}") + print(f"Post status: {args.post_status}") + print(f"Delete after migration: {delete_after}") + + result = app.migrate( + csv_file=csv_file, + destination_site=args.destination, + create_categories=create_categories, + create_tags=create_tags, + delete_after=delete_after, + status=args.post_status, + output_file=args.output, + ignore_original_date=args.ignore_original_date + ) + + if result: + print(f"\nāœ… Migration completed!") + print(f" Report: {result}") + + return 0 + + def cmd_status(app, args): """Show status.""" if args.dry_run: @@ -285,6 +406,9 @@ SEO Automation CLI - Available Commands Export & Analysis: export Export all posts from WordPress sites + export --author "John Doe" Export posts by specific author + export --author-id 1 2 Export posts by author IDs + export -s mistergeek.net Export from specific site only analyze [csv_file] Analyze posts with AI analyze -f title Analyze specific fields (title, meta_description, categories, site) analyze -u Update input CSV with new columns (creates backup) @@ -299,11 +423,35 @@ Category Management: Strategy & Migration: editorial_strategy [csv] Analyze editorial lines and recommend migrations editorial_strategy Get migration recommendations between sites + migrate --destination Migrate posts from CSV to destination site + migrate --source --destination Migrate posts with filters + migrate --source A --to B --category-filter "VPN" Migrate specific categories + migrate --source A --to B --date-after 2024-01-01 --limit 10 Utility: status Show output files status help Show this help message +Export Options: + --author Filter by author name(s) (case-insensitive, partial match) + --author-id Filter by author ID(s) + --site, -s Export from specific site only + +Migration Options: + --destination, --to Destination site: mistergeek.net, webscroll.fr, hellogeek.net + --source, --from Source site for filtered migration + --keep-source Keep posts on source site (default: delete after migration) + --post-status Status for migrated posts: draft, publish, pending (default: draft) + --no-categories Do not create categories automatically + --no-tags Do not create tags automatically + --category-filter Filter by category names (for filtered migration) + --tag-filter Filter by tag names (for filtered migration) + --date-after Migrate posts after this date (YYYY-MM-DD) + --date-before Migrate posts before this date (YYYY-MM-DD) + --limit Limit number of posts to migrate + --ignore-original-date Use current date instead of original post date + --output, -o Custom output file path for migration report + Options: --verbose, -v Enable verbose logging --dry-run Show what would be done without doing it @@ -317,11 +465,17 @@ Options: Examples: seo export + seo export --author "John Doe" + seo export --author-id 1 2 + seo export -s mistergeek.net --author "admin" seo analyze -f title categories seo category_propose seo category_apply -s mistergeek.net -c Medium seo category_create -s webscroll.fr "Torrent Clients" seo editorial_strategy + seo migrate posts_to_migrate.csv --destination mistergeek.net + seo migrate --source webscroll.fr --destination mistergeek.net --category-filter VPN + seo migrate --source A --to B --date-after 2024-01-01 --limit 10 --keep-source seo status """) return 0 diff --git a/src/seo/exporter.py b/src/seo/exporter.py index 108b7d3..a5554e1 100644 --- a/src/seo/exporter.py +++ b/src/seo/exporter.py @@ -20,11 +20,21 @@ logger = logging.getLogger(__name__) class PostExporter: """Export posts from WordPress sites to CSV.""" - def __init__(self): - """Initialize the exporter.""" + def __init__(self, author_filter: Optional[List[str]] = None, + author_ids: Optional[List[int]] = None): + """ + Initialize the exporter. + + Args: + author_filter: List of author names to filter by (case-insensitive) + author_ids: List of author IDs to filter by + """ self.sites = Config.WORDPRESS_SITES self.all_posts = [] self.category_cache = {} + self.author_filter = author_filter + self.author_ids = author_ids + self.author_cache = {} # Cache author info by site def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, Dict]: """Fetch category names from a WordPress site.""" @@ -50,8 +60,55 @@ class PostExporter: self.category_cache[site_name] = categories return categories - def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]: - """Fetch all posts from a WordPress site.""" + def fetch_authors(self, site_name: str, site_config: Dict) -> Dict[int, Dict]: + """ + Fetch all authors/users from a WordPress site. + + Returns: + Dict mapping author ID to author data (name, slug) + """ + if site_name in self.author_cache: + return self.author_cache[site_name] + + logger.info(f" Fetching authors from {site_name}...") + authors = {} + base_url = site_config['url'].rstrip('/') + api_url = f"{base_url}/wp-json/wp/v2/users" + auth = HTTPBasicAuth(site_config['username'], site_config['password']) + + try: + response = requests.get(api_url, params={'per_page': 100}, auth=auth, timeout=10) + response.raise_for_status() + + for user in response.json(): + authors[user['id']] = { + 'id': user['id'], + 'name': user.get('name', ''), + 'slug': user.get('slug', ''), + 'description': user.get('description', '') + } + logger.info(f" āœ“ Fetched {len(authors)} authors") + except Exception as e: + logger.warning(f" Could not fetch authors from {site_name}: {e}") + # Fallback: create empty dict if authors can't be fetched + # Author IDs will still be exported, just without names + + self.author_cache[site_name] = authors + return authors + + def fetch_posts_from_site(self, site_name: str, site_config: Dict, + authors_map: Optional[Dict[int, Dict]] = None) -> List[Dict]: + """ + Fetch all posts from a WordPress site. + + Args: + site_name: Site name + site_config: Site configuration + authors_map: Optional authors mapping for filtering + + Returns: + List of post data + """ logger.info(f"\nFetching posts from {site_name}...") posts = [] @@ -59,14 +116,23 @@ class PostExporter: api_url = f"{base_url}/wp-json/wp/v2/posts" auth = HTTPBasicAuth(site_config['username'], site_config['password']) + # Build base params + base_params = {'page': 1, 'per_page': 100, '_embed': True} + + # Add author filter if specified + if self.author_ids: + base_params['author'] = ','.join(map(str, self.author_ids)) + logger.info(f" Filtering by author IDs: {self.author_ids}") + for status in ['publish', 'draft']: page = 1 while True: try: + params = {**base_params, 'page': page, 'status': status} logger.info(f" Fetching page {page} ({status} posts)...") response = requests.get( api_url, - params={'page': page, 'per_page': 100, 'status': status}, + params=params, auth=auth, timeout=10 ) @@ -76,8 +142,29 @@ class PostExporter: if not page_posts: break + # Filter by author name if specified + if self.author_filter and authors_map: + filtered_posts = [] + for post in page_posts: + author_id = post.get('author') + if author_id and author_id in authors_map: + author_name = authors_map[author_id]['name'].lower() + author_slug = authors_map[author_id]['slug'].lower() + + # Check if author matches filter + for filter_name in self.author_filter: + filter_lower = filter_name.lower() + if (filter_lower in author_name or + filter_lower == author_slug): + filtered_posts.append(post) + break + + page_posts = filtered_posts + logger.info(f" āœ“ Got {len(page_posts)} posts after author filter") + posts.extend(page_posts) - logger.info(f" āœ“ Got {len(page_posts)} posts") + if page_posts: + logger.info(f" āœ“ Got {len(page_posts)} posts") page += 1 time.sleep(0.5) @@ -94,7 +181,8 @@ class PostExporter: logger.info(f"āœ“ Total posts from {site_name}: {len(posts)}\n") return posts - def extract_post_details(self, post: Dict, site_name: str, category_map: Dict) -> Dict: + def extract_post_details(self, post: Dict, site_name: str, category_map: Dict, + author_map: Optional[Dict[int, Dict]] = None) -> Dict: """Extract post details for CSV export.""" title = post.get('title', {}) if isinstance(title, dict): @@ -122,6 +210,13 @@ class PostExporter: for cat_id in category_ids ]) if category_ids else '' + # Get author name from author map + author_id = post.get('author', '') + author_name = '' + if author_map and author_id: + author_data = author_map.get(author_id, {}) + author_name = author_data.get('name', '') + return { 'site': site_name, 'post_id': post['id'], @@ -129,7 +224,8 @@ class PostExporter: 'title': title.strip(), 'slug': post.get('slug', ''), 'url': post.get('link', ''), - 'author_id': post.get('author', ''), + 'author_id': author_id, + 'author_name': author_name, 'date_published': post.get('date', ''), 'date_modified': post.get('modified', ''), 'categories': category_names, @@ -158,7 +254,7 @@ class PostExporter: return "" fieldnames = [ - 'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id', + 'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id', 'author_name', 'date_published', 'date_modified', 'categories', 'tags', 'excerpt', 'content_preview', 'seo_title', 'meta_description', 'focus_keyword', 'word_count', ] @@ -173,24 +269,46 @@ class PostExporter: logger.info(f"āœ“ CSV exported to: {output_file}") return str(output_file) - def run(self) -> str: - """Run the complete export process.""" + def run(self, site_filter: Optional[str] = None) -> str: + """ + Run the complete export process. + + Args: + site_filter: Optional site name to export from (default: all sites) + + Returns: + Path to exported CSV file + """ logger.info("="*70) logger.info("EXPORTING ALL POSTS") logger.info("="*70) + + if self.author_filter: + logger.info(f"Author filter: {self.author_filter}") + if self.author_ids: + logger.info(f"Author IDs: {self.author_ids}") + if site_filter: + logger.info(f"Site filter: {site_filter}") + logger.info("Sites configured: " + ", ".join(self.sites.keys())) for site_name, config in self.sites.items(): + # Skip sites if filter is specified + if site_filter and site_name != site_filter: + logger.info(f"Skipping {site_name} (not in filter)") + continue + categories = self.fetch_category_names(site_name, config) - posts = self.fetch_posts_from_site(site_name, config) + authors = self.fetch_authors(site_name, config) + posts = self.fetch_posts_from_site(site_name, config, authors) if posts: for post in posts: - post_details = self.extract_post_details(post, site_name, categories) + post_details = self.extract_post_details(post, site_name, categories, authors) self.all_posts.append(post_details) if not self.all_posts: - logger.error("No posts found on any site") + logger.warning("No posts found matching criteria") return "" self.all_posts.sort(key=lambda x: (x['site'], x['post_id'])) diff --git a/src/seo/post_migrator.py b/src/seo/post_migrator.py new file mode 100644 index 0000000..4a4e8e2 --- /dev/null +++ b/src/seo/post_migrator.py @@ -0,0 +1,1007 @@ +""" +Post Migrator - Migrate posts between WordPress sites +""" + +import csv +import logging +from pathlib import Path +from datetime import datetime +from typing import Dict, List, Optional, Tuple +import requests +from requests.auth import HTTPBasicAuth + +from .config import Config + +logger = logging.getLogger(__name__) + + +class WordPressPostMigrator: + """Migrate posts between WordPress sites.""" + + def __init__(self): + """Initialize the migrator.""" + self.sites = Config.WORDPRESS_SITES + self.category_cache = {} # Cache categories by site + self.tag_cache = {} # Cache tags by site + self.stats = { + 'total_posts': 0, + 'migrated': 0, + 'failed': 0, + 'skipped': 0, + 'categories_created': 0, + 'tags_created': 0, + } + + def get_site_auth(self, site_name: str) -> Tuple[str, HTTPBasicAuth]: + """Get site URL and auth for a given site name.""" + site_config = self.sites.get(site_name) + if not site_config: + raise ValueError(f"Site not found: {site_name}") + + base_url = site_config['url'].rstrip('/') + auth = HTTPBasicAuth(site_config['username'], site_config['password']) + return base_url, auth + + def fetch_post_from_source(self, source_site: str, post_id: int) -> Optional[Dict]: + """ + Fetch a single post from source site. + + Args: + source_site: Source site name + post_id: Post ID to fetch + + Returns: + Post data dict or None if failed + """ + try: + base_url, auth = self.get_site_auth(source_site) + + logger.info(f"Fetching post {post_id} from {source_site}...") + + response = requests.get( + f"{base_url}/wp-json/wp/v2/posts/{post_id}", + params={'_embed': True}, + auth=auth, + timeout=30 + ) + response.raise_for_status() + + post_data = response.json() + logger.info(f"āœ“ Fetched post: {post_data.get('title', {}).get('rendered', 'Unknown')}") + return post_data + + except Exception as e: + logger.error(f"Error fetching post {post_id} from {source_site}: {e}") + return None + + def fetch_categories(self, site_name: str) -> Dict[str, Dict]: + """ + Fetch all categories from a WordPress site. + + Returns: + Dict mapping category slug to category data + """ + if site_name in self.category_cache: + return self.category_cache[site_name] + + logger.info(f"Fetching categories from {site_name}...") + + try: + base_url, auth = self.get_site_auth(site_name) + categories = {} + page = 1 + + while True: + response = requests.get( + f"{base_url}/wp-json/wp/v2/categories", + params={'per_page': 100, 'page': page}, + auth=auth, + timeout=10 + ) + response.raise_for_status() + + page_categories = response.json() + if not page_categories: + break + + for cat in page_categories: + categories[cat['slug'].lower()] = { + 'id': cat['id'], + 'name': cat['name'], + 'slug': cat['slug'], + 'description': cat.get('description', ''), + 'parent': cat.get('parent', 0), + 'count': cat.get('count', 0) + } + + if len(page_categories) < 100: + break + page += 1 + + self.category_cache[site_name] = categories + logger.info(f"āœ“ Fetched {len(categories)} categories from {site_name}") + return categories + + except Exception as e: + logger.error(f"Error fetching categories from {site_name}: {e}") + return {} + + def fetch_tags(self, site_name: str) -> Dict[str, Dict]: + """ + Fetch all tags from a WordPress site. + + Returns: + Dict mapping tag slug to tag data + """ + if site_name in self.tag_cache: + return self.tag_cache[site_name] + + logger.info(f"Fetching tags from {site_name}...") + + try: + base_url, auth = self.get_site_auth(site_name) + tags = {} + page = 1 + + while True: + response = requests.get( + f"{base_url}/wp-json/wp/v2/tags", + params={'per_page': 100, 'page': page}, + auth=auth, + timeout=10 + ) + response.raise_for_status() + + page_tags = response.json() + if not page_tags: + break + + for tag in page_tags: + tags[tag['slug'].lower()] = { + 'id': tag['id'], + 'name': tag['name'], + 'slug': tag['slug'], + 'description': tag.get('description', ''), + 'count': tag.get('count', 0) + } + + if len(page_tags) < 100: + break + page += 1 + + self.tag_cache[site_name] = tags + logger.info(f"āœ“ Fetched {len(tags)} tags from {site_name}") + return tags + + except Exception as e: + logger.error(f"Error fetching tags from {site_name}: {e}") + return {} + + def get_or_create_category(self, site_name: str, category_name: str, + description: str = '', parent_slug: str = None) -> Optional[int]: + """ + Get existing category or create it if it doesn't exist. + + Args: + site_name: Destination site name + category_name: Name of the category + description: Category description + parent_slug: Parent category slug (for hierarchical categories) + + Returns: + Category ID + """ + # Fetch categories if not cached + if site_name not in self.category_cache: + self.fetch_categories(site_name) + + categories = self.category_cache.get(site_name, {}) + category_name_lower = category_name.lower() + + # Try exact name match (case-insensitive) + for slug, cat_data in categories.items(): + if cat_data['name'].lower() == category_name_lower: + logger.info(f"āœ“ Found existing category '{category_name}' (ID: {cat_data['id']})") + return cat_data['id'] + + # Try slug match + slug = category_name.lower().replace(' ', '-').replace('/', '-') + if slug in categories: + logger.info(f"āœ“ Found existing category '{category_name}' (ID: {categories[slug]['id']})") + return categories[slug]['id'] + + # Create new category + logger.info(f"Creating new category '{category_name}' on {site_name}...") + return self.create_category(site_name, category_name, description, parent_slug) + + def create_category(self, site_name: str, category_name: str, + description: str = '', parent_slug: str = None) -> Optional[int]: + """ + Create a new category in WordPress. + + Args: + site_name: Destination site name + category_name: Name of the category + description: Category description + parent_slug: Parent category slug + + Returns: + Category ID if successful, None otherwise + """ + try: + base_url, auth = self.get_site_auth(site_name) + + # Create slug from name + slug = category_name.lower().replace(' ', '-').replace('/', '-') + + # Handle parent category + parent_id = 0 + if parent_slug: + categories = self.category_cache.get(site_name, {}) + if parent_slug in categories: + parent_id = categories[parent_slug]['id'] + + response = requests.post( + f"{base_url}/wp-json/wp/v2/categories", + json={ + 'name': category_name, + 'slug': slug, + 'description': description, + 'parent': parent_id + }, + auth=auth, + timeout=10 + ) + + if response.status_code == 201: + category_data = response.json() + logger.info(f"āœ“ Created category '{category_name}' (ID: {category_data['id']})") + self.stats['categories_created'] += 1 + + # Update cache + if site_name in self.category_cache: + self.category_cache[site_name][slug] = { + 'id': category_data['id'], + 'name': category_data['name'], + 'slug': slug, + 'description': description, + 'parent': parent_id, + 'count': 0 + } + + return category_data['id'] + elif response.status_code == 400: + error_data = response.json() + if error_data.get('code') == 'term_exists': + term_id = error_data.get('data', {}).get('term_id') + if term_id: + logger.info(f" Category '{category_name}' already exists (ID: {term_id})") + # Refresh cache + self.fetch_categories(site_name) + return self.get_or_create_category(site_name, category_name, description, parent_slug) + + logger.warning(f" Category already exists or error: {error_data}") + return None + else: + logger.error(f"Error creating category: {response.status_code} - {response.text}") + return None + + except Exception as e: + logger.error(f"Error creating category: {e}") + return None + + def get_or_create_tag(self, site_name: str, tag_name: str, + description: str = '') -> Optional[int]: + """ + Get existing tag or create it if it doesn't exist. + + Args: + site_name: Destination site name + tag_name: Name of the tag + description: Tag description + + Returns: + Tag ID + """ + # Fetch tags if not cached + if site_name not in self.tag_cache: + self.fetch_tags(site_name) + + tags = self.tag_cache.get(site_name, {}) + tag_name_lower = tag_name.lower() + + # Try exact name match (case-insensitive) + for slug, tag_data in tags.items(): + if tag_data['name'].lower() == tag_name_lower: + logger.info(f"āœ“ Found existing tag '{tag_name}' (ID: {tag_data['id']})") + return tag_data['id'] + + # Try slug match + slug = tag_name.lower().replace(' ', '-').replace('/', '-') + if slug in tags: + logger.info(f"āœ“ Found existing tag '{tag_name}' (ID: {tags[slug]['id']})") + return tags[slug]['id'] + + # Create new tag + logger.info(f"Creating new tag '{tag_name}' on {site_name}...") + return self.create_tag(site_name, tag_name, description) + + def create_tag(self, site_name: str, tag_name: str, + description: str = '') -> Optional[int]: + """ + Create a new tag in WordPress. + + Args: + site_name: Destination site name + tag_name: Name of the tag + description: Tag description + + Returns: + Tag ID if successful, None otherwise + """ + try: + base_url, auth = self.get_site_auth(site_name) + + # Create slug from name + slug = tag_name.lower().replace(' ', '-').replace('/', '-') + + response = requests.post( + f"{base_url}/wp-json/wp/v2/tags", + json={ + 'name': tag_name, + 'slug': slug, + 'description': description + }, + auth=auth, + timeout=10 + ) + + if response.status_code == 201: + tag_data = response.json() + logger.info(f"āœ“ Created tag '{tag_name}' (ID: {tag_data['id']})") + self.stats['tags_created'] += 1 + + # Update cache + if site_name in self.tag_cache: + self.tag_cache[site_name][slug] = { + 'id': tag_data['id'], + 'name': tag_data['name'], + 'slug': slug, + 'description': description, + 'count': 0 + } + + return tag_data['id'] + elif response.status_code == 400: + error_data = response.json() + if error_data.get('code') == 'term_exists': + term_id = error_data.get('data', {}).get('term_id') + if term_id: + logger.info(f" Tag '{tag_name}' already exists (ID: {term_id})") + self.fetch_tags(site_name) + return self.get_or_create_tag(site_name, tag_name, description) + + logger.warning(f" Tag already exists or error: {error_data}") + return None + else: + logger.error(f"Error creating tag: {response.status_code} - {response.text}") + return None + + except Exception as e: + logger.error(f"Error creating tag: {e}") + return None + + def map_categories(self, source_categories: List[int], source_site: str, + destination_site: str) -> List[int]: + """ + Map source categories to destination site categories. + + Args: + source_categories: List of category IDs from source + source_site: Source site name + destination_site: Destination site name + + Returns: + List of category IDs for destination + """ + # Fetch source categories to get names + source_cats = self.fetch_categories(source_site) + dest_cats = self.fetch_categories(destination_site) + + destination_category_ids = [] + + for cat_id in source_categories: + cat_id = int(cat_id) + if cat_id in source_cats: + source_cat = source_cats[cat_id] + cat_name = source_cat['name'] + + # Try to find matching category in destination + found = False + for slug, dest_cat in dest_cats.items(): + if dest_cat['name'].lower() == cat_name.lower(): + destination_category_ids.append(dest_cat['id']) + found = True + logger.info(f" Mapped category '{cat_name}' -> '{dest_cat['name']}'") + break + + if not found: + # Create new category in destination + new_cat_id = self.get_or_create_category( + destination_site, + cat_name, + source_cat.get('description', '') + ) + if new_cat_id: + destination_category_ids.append(new_cat_id) + logger.info(f" Created category '{cat_name}' in destination") + else: + logger.warning(f" Category ID {cat_id} not found in source") + + return destination_category_ids + + def map_tags(self, source_tags: List[int], source_site: str, + destination_site: str) -> List[int]: + """ + Map source tags to destination site tags. + + Args: + source_tags: List of tag IDs from source + source_site: Source site name + destination_site: Destination site name + + Returns: + List of tag IDs for destination + """ + # Fetch source tags to get names + source_tags_data = self.fetch_tags(source_site) + dest_tags_data = self.fetch_tags(destination_site) + + destination_tag_ids = [] + + for tag_id in source_tags: + tag_id = int(tag_id) + if tag_id in source_tags_data: + source_tag = source_tags_data[tag_id] + tag_name = source_tag['name'] + + # Try to find matching tag in destination + found = False + for slug, dest_tag in dest_tags_data.items(): + if dest_tag['name'].lower() == tag_name.lower(): + destination_tag_ids.append(dest_tag['id']) + found = True + logger.info(f" Mapped tag '{tag_name}' -> '{dest_tag['name']}'") + break + + if not found: + # Create new tag in destination + new_tag_id = self.get_or_create_tag( + destination_site, + tag_name, + source_tag.get('description', '') + ) + if new_tag_id: + destination_tag_ids.append(new_tag_id) + logger.info(f" Created tag '{tag_name}' in destination") + else: + logger.warning(f" Tag ID {tag_id} not found in source") + + return destination_tag_ids + + def create_post_in_destination(self, post_data: Dict, destination_site: str, + category_ids: List[int], tag_ids: List[int], + status: str = 'draft', + ignore_original_date: bool = False) -> Optional[int]: + """ + Create a post in the destination site. + + Args: + post_data: Post data from source + destination_site: Destination site name + category_ids: List of category IDs for destination + tag_ids: List of tag IDs for destination + status: Post status ('draft', 'publish', 'pending') + ignore_original_date: If True, use current date instead of original post date + + Returns: + New post ID if successful, None otherwise + """ + try: + base_url, auth = self.get_site_auth(destination_site) + + # Prepare post data for destination + new_post_data = { + 'title': post_data.get('title', {}).get('rendered', ''), + 'content': post_data.get('content', {}).get('rendered', ''), + 'excerpt': post_data.get('excerpt', {}).get('rendered', ''), + 'status': status, + 'slug': post_data.get('slug', ''), + 'categories': category_ids, + 'tags': tag_ids, + } + + # Handle post dates + if ignore_original_date: + logger.info(f" Using current date (ignoring original date)") + # Don't set date - WordPress will use current time + else: + # Preserve original publication date + date_published = post_data.get('date', None) + if date_published: + new_post_data['date'] = date_published + logger.info(f" Preserving original date: {date_published}") + + # Also preserve modified date if available + date_modified = post_data.get('modified', None) + if date_modified: + new_post_data['modified'] = date_modified + + # Add meta data (SEO fields) if available + meta = post_data.get('meta', {}) + if meta: + new_post_data['meta'] = {} + # Copy common SEO meta fields + seo_fields = [ + 'rank_math_title', + 'rank_math_description', + 'rank_math_focus_keyword', + '_yoast_wpseo_metadesc', + '_yoast_wpseo_focuskw', + '_yoast_wpseo_title', + ] + for field in seo_fields: + if field in meta: + new_post_data['meta'][field] = meta[field] + + logger.info(f"Creating post in {destination_site}...") + + response = requests.post( + f"{base_url}/wp-json/wp/v2/posts", + json=new_post_data, + auth=auth, + timeout=30 + ) + + if response.status_code == 201: + new_post = response.json() + new_post_id = new_post['id'] + logger.info(f"āœ“ Created post in {destination_site} (ID: {new_post_id})") + logger.info(f" URL: {new_post.get('link', '')}") + return new_post_id + else: + logger.error(f"Error creating post: {response.status_code} - {response.text}") + return None + + except Exception as e: + logger.error(f"Error creating post: {e}") + return None + + def delete_source_post(self, source_site: str, post_id: int) -> bool: + """ + Delete post from source site (optional, after migration). + + Args: + source_site: Source site name + post_id: Post ID to delete + + Returns: + True if successful, False otherwise + """ + try: + base_url, auth = self.get_site_auth(source_site) + + logger.info(f"Deleting post {post_id} from {source_site}...") + + response = requests.delete( + f"{base_url}/wp-json/wp/v2/posts/{post_id}", + auth=auth, + timeout=10 + ) + + if response.status_code == 200: + logger.info(f"āœ“ Deleted post {post_id} from {source_site}") + return True + else: + logger.error(f"Error deleting post: {response.status_code} - {response.text}") + return False + + except Exception as e: + logger.error(f"Error deleting post: {e}") + return False + + def migrate_post(self, source_site: str, post_id: int, destination_site: str, + create_categories: bool = True, create_tags: bool = True, + delete_after: bool = False, status: str = 'draft', + ignore_original_date: bool = False) -> Optional[Dict]: + """ + Migrate a single post from source to destination site. + + Args: + source_site: Source site name + post_id: Post ID to migrate + destination_site: Destination site name + create_categories: If True, create categories if they don't exist + create_tags: If True, create tags if they don't exist + delete_after: If True, delete post from source after migration + status: Status for new post ('draft', 'publish', 'pending') + ignore_original_date: If True, use current date instead of original post date + + Returns: + Migration result dict or None if failed + """ + logger.info("\n" + "="*70) + logger.info(f"MIGRATING POST {post_id}") + logger.info(f"From: {source_site} -> To: {destination_site}") + logger.info(f"Ignore original date: {ignore_original_date}") + logger.info("="*70) + + self.stats['total_posts'] += 1 + + # Fetch post from source + post_data = self.fetch_post_from_source(source_site, post_id) + if not post_data: + self.stats['failed'] += 1 + return None + + # Map categories + category_ids = [] + if create_categories: + source_category_ids = post_data.get('categories', []) + if source_category_ids: + category_ids = self.map_categories( + source_category_ids, source_site, destination_site + ) + logger.info(f" Mapped {len(category_ids)} categories") + + # Map tags + tag_ids = [] + if create_tags: + source_tag_ids = post_data.get('tags', []) + if source_tag_ids: + tag_ids = self.map_tags( + source_tag_ids, source_site, destination_site + ) + logger.info(f" Mapped {len(tag_ids)} tags") + + # Create post in destination + new_post_id = self.create_post_in_destination( + post_data, destination_site, category_ids, tag_ids, status, + ignore_original_date=ignore_original_date + ) + + if new_post_id: + self.stats['migrated'] += 1 + + # Delete source post if requested + if delete_after: + if self.delete_source_post(source_site, post_id): + logger.info(f"āœ“ Deleted source post {post_id}") + + result = { + 'source_site': source_site, + 'source_post_id': post_id, + 'destination_site': destination_site, + 'destination_post_id': new_post_id, + 'title': post_data.get('title', {}).get('rendered', ''), + 'status': status, + 'categories_migrated': len(category_ids), + 'tags_migrated': len(tag_ids), + 'deleted_from_source': delete_after, + } + return result + else: + self.stats['failed'] += 1 + return None + + def migrate_posts_from_csv(self, csv_file: str, destination_site: str, + create_categories: bool = True, create_tags: bool = True, + delete_after: bool = False, status: str = 'draft', + output_file: Optional[str] = None, + ignore_original_date: bool = False) -> str: + """ + Migrate multiple posts from a CSV file. + + Args: + csv_file: Path to CSV file with posts to migrate + destination_site: Destination site name + create_categories: If True, create categories if they don't exist + create_tags: If True, create tags if they don't exist + delete_after: If True, delete posts from source after migration + status: Status for new posts + output_file: Path to output migration report CSV + ignore_original_date: If True, use current date instead of original post date + + Returns: + Path to migration report CSV + """ + logger.info("\n" + "="*70) + logger.info("BULK POST MIGRATION") + logger.info("="*70) + logger.info(f"Source CSV: {csv_file}") + logger.info(f"Destination: {destination_site}") + logger.info(f"Create categories: {create_categories}") + logger.info(f"Create tags: {create_tags}") + logger.info(f"Delete after migration: {delete_after}") + logger.info(f"Post status: {status}") + logger.info(f"Ignore original date: {ignore_original_date}") + logger.info("="*70) + + # Load posts from CSV + try: + with open(csv_file, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + posts_to_migrate = list(reader) + logger.info(f"Loaded {len(posts_to_migrate)} posts from CSV") + except Exception as e: + logger.error(f"Error loading CSV: {e}") + raise + + if not posts_to_migrate: + logger.warning("No posts to migrate") + return "" + + # Validate posts have required fields + required_fields = ['site', 'post_id'] + for post in posts_to_migrate: + if 'site' not in post or 'post_id' not in post: + logger.error("CSV must have 'site' and 'post_id' columns") + raise ValueError("CSV must have 'site' and 'post_id' columns") + + # Migrate each post + migration_results = [] + + for i, post in enumerate(posts_to_migrate, 1): + try: + source_site = post['site'] + post_id = int(post['post_id']) + + # Skip if source and destination are the same + if source_site == destination_site: + logger.warning(f"Skipping post {post_id}: source and destination are the same") + self.stats['skipped'] += 1 + continue + + result = self.migrate_post( + source_site=source_site, + post_id=post_id, + destination_site=destination_site, + create_categories=create_categories, + create_tags=create_tags, + delete_after=delete_after, + status=status, + ignore_original_date=ignore_original_date + ) + + if result: + migration_results.append(result) + logger.info(f"āœ“ Migrated post {i}/{len(posts_to_migrate)}") + else: + logger.error(f"āœ— Failed to migrate post {i}/{len(posts_to_migrate)}") + + except Exception as e: + logger.error(f"Error migrating post {post.get('post_id', 'unknown')}: {e}") + self.stats['failed'] += 1 + + # Write migration report + if not output_file: + output_dir = Path(__file__).parent.parent.parent / 'output' + output_dir.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + output_file = output_dir / f'migration_report_{destination_site}_{timestamp}.csv' + + output_file = Path(output_file) + output_file.parent.mkdir(parents=True, exist_ok=True) + + fieldnames = [ + 'source_site', 'source_post_id', 'destination_site', 'destination_post_id', + 'title', 'status', 'categories_migrated', 'tags_migrated', 'deleted_from_source' + ] + + with open(output_file, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(migration_results) + + logger.info(f"\nāœ“ Migration report saved to: {output_file}") + + # Print summary + logger.info("\n" + "="*70) + logger.info("MIGRATION SUMMARY") + logger.info("="*70) + logger.info(f"Total posts processed: {self.stats['total_posts']}") + logger.info(f"Successfully migrated: {self.stats['migrated']}") + logger.info(f"Failed: {self.stats['failed']}") + logger.info(f"Skipped: {self.stats['skipped']}") + logger.info(f"Categories created: {self.stats['categories_created']}") + logger.info(f"Tags created: {self.stats['tags_created']}") + logger.info("="*70) + + return str(output_file) + + def migrate_posts_by_filter(self, source_site: str, destination_site: str, + category_filter: Optional[List[str]] = None, + tag_filter: Optional[List[str]] = None, + date_after: Optional[str] = None, + date_before: Optional[str] = None, + status_filter: Optional[List[str]] = None, + create_categories: bool = True, + create_tags: bool = True, + delete_after: bool = False, + status: str = 'draft', + limit: Optional[int] = None, + ignore_original_date: bool = False) -> str: + """ + Migrate posts based on filters. + + Args: + source_site: Source site name + destination_site: Destination site name + category_filter: List of category names to filter by + tag_filter: List of tag names to filter by + date_after: Only migrate posts after this date (YYYY-MM-DD) + date_before: Only migrate posts before this date (YYYY-MM-DD) + status_filter: List of statuses to filter by (e.g., ['publish', 'draft']) + create_categories: If True, create categories if they don't exist + create_tags: If True, create tags if they don't exist + delete_after: If True, delete posts from source after migration + status: Status for new posts + limit: Maximum number of posts to migrate + ignore_original_date: If True, use current date instead of original post date + + Returns: + Path to migration report CSV + """ + logger.info("\n" + "="*70) + logger.info("FILTERED POST MIGRATION") + logger.info("="*70) + logger.info(f"Source: {source_site}") + logger.info(f"Destination: {destination_site}") + logger.info(f"Category filter: {category_filter}") + logger.info(f"Tag filter: {tag_filter}") + logger.info(f"Date range: {date_after} to {date_before}") + logger.info(f"Status filter: {status_filter}") + logger.info(f"Limit: {limit}") + logger.info(f"Ignore original date: {ignore_original_date}") + logger.info("="*70) + + # Fetch posts from source + try: + base_url, auth = self.get_site_auth(source_site) + posts_to_migrate = [] + + # Build query parameters + params = { + 'per_page': 100, + 'page': 1, + '_embed': True + } + + if status_filter: + params['status'] = ','.join(status_filter) + else: + params['status'] = 'publish,draft' + + if date_after: + params['after'] = date_after + + if date_before: + params['before'] = date_before + + # Fetch all posts matching filters + while True: + logger.info(f"Fetching page {params['page']} from {source_site}...") + + response = requests.get( + f"{base_url}/wp-json/wp/v2/posts", + params=params, + auth=auth, + timeout=30 + ) + response.raise_for_status() + + page_posts = response.json() + if not page_posts: + break + + posts_to_migrate.extend(page_posts) + + if len(page_posts) < 100: + break + if limit and len(posts_to_migrate) >= limit: + break + + params['page'] += 1 + + logger.info(f"Fetched {len(posts_to_migrate)} posts from {source_site}") + + except Exception as e: + logger.error(f"Error fetching posts: {e}") + raise + + if not posts_to_migrate: + logger.warning("No posts found matching filters") + return "" + + # Apply category and tag filters + if category_filter or tag_filter: + source_categories = self.fetch_categories(source_site) + source_tags = self.fetch_tags(source_site) + + # Get category IDs from names + category_ids = [] + if category_filter: + for cat_name in category_filter: + cat_name_lower = cat_name.lower() + for slug, cat_data in source_categories.items(): + if cat_data['name'].lower() == cat_name_lower: + category_ids.append(cat_data['id']) + break + + # Get tag IDs from names + tag_ids = [] + if tag_filter: + for tag_name in tag_filter: + tag_name_lower = tag_name.lower() + for slug, tag_data in source_tags.items(): + if tag_data['name'].lower() == tag_name_lower: + tag_ids.append(tag_data['id']) + break + + # Filter posts + filtered_posts = [] + for post in posts_to_migrate: + post_categories = post.get('categories', []) + post_tags = post.get('tags', []) + + # Check category match + category_match = True + if category_ids: + category_match = any(cat_id in post_categories for cat_id in category_ids) + + # Check tag match + tag_match = True + if tag_ids: + tag_match = any(tag_id in post_tags for tag_id in tag_ids) + + if category_match and tag_match: + filtered_posts.append(post) + + posts_to_migrate = filtered_posts + logger.info(f"Filtered to {len(posts_to_migrate)} posts") + + # Apply limit + if limit: + posts_to_migrate = posts_to_migrate[:limit] + logger.info(f"Limited to {len(posts_to_migrate)} posts") + + # Create temporary CSV for migration + temp_posts = [] + for post in posts_to_migrate: + temp_posts.append({ + 'site': source_site, + 'post_id': post['id'], + 'title': post.get('title', {}).get('rendered', ''), + }) + + # Write temporary CSV + temp_csv = Path(__file__).parent.parent.parent / 'output' / 'temp_migration_posts.csv' + temp_csv.parent.mkdir(parents=True, exist_ok=True) + + with open(temp_csv, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=['site', 'post_id', 'title']) + writer.writeheader() + writer.writerows(temp_posts) + + logger.info(f"Created temporary CSV with {len(temp_posts)} posts") + + # Migrate using the CSV method + return self.migrate_posts_from_csv( + csv_file=str(temp_csv), + destination_site=destination_site, + create_categories=create_categories, + create_tags=create_tags, + delete_after=delete_after, + status=status, + ignore_original_date=ignore_original_date + )