Add post migration and author filter features
- Add migrate command to transfer posts between websites - Support CSV-based and filtered migration modes - Preserve original post dates (with --ignore-original-date option) - Auto-create categories and tags on destination site - Add author filtering to export (--author and --author-id flags) - Include author_name column in exported CSV - Add comprehensive documentation (MIGRATION_GUIDE.md, AUTHOR_FILTER_GUIDE.md) Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
118
src/seo/app.py
118
src/seo/app.py
@@ -12,6 +12,7 @@ from .analyzer import EnhancedPostAnalyzer
|
||||
from .category_proposer import CategoryProposer
|
||||
from .category_manager import WordPressCategoryManager, CategoryAssignmentProcessor
|
||||
from .editorial_strategy import EditorialStrategyAnalyzer
|
||||
from .post_migrator import WordPressPostMigrator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -34,11 +35,23 @@ class SEOApp:
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
def export(self) -> str:
|
||||
"""Export all posts from WordPress sites."""
|
||||
def export(self, author_filter: Optional[List[str]] = None,
|
||||
author_ids: Optional[List[int]] = None,
|
||||
site_filter: Optional[str] = None) -> str:
|
||||
"""
|
||||
Export all posts from WordPress sites.
|
||||
|
||||
Args:
|
||||
author_filter: List of author names to filter by
|
||||
author_ids: List of author IDs to filter by
|
||||
site_filter: Export from specific site only
|
||||
|
||||
Returns:
|
||||
Path to exported CSV file
|
||||
"""
|
||||
logger.info("📦 Exporting all posts from WordPress sites...")
|
||||
exporter = PostExporter()
|
||||
return exporter.run()
|
||||
exporter = PostExporter(author_filter=author_filter, author_ids=author_ids)
|
||||
return exporter.run(site_filter=site_filter)
|
||||
|
||||
def analyze(self, csv_file: Optional[str] = None, fields: Optional[List[str]] = None,
|
||||
update: bool = False, output: Optional[str] = None) -> str:
|
||||
@@ -146,23 +159,110 @@ class SEOApp:
|
||||
def editorial_strategy(self, csv_file: Optional[str] = None) -> dict:
|
||||
"""
|
||||
Analyze editorial strategy and recommend migrations.
|
||||
|
||||
|
||||
Args:
|
||||
csv_file: Path to posts CSV (uses latest export if not provided)
|
||||
|
||||
|
||||
Returns:
|
||||
Analysis results dict
|
||||
"""
|
||||
logger.info("📊 Analyzing editorial strategy...")
|
||||
|
||||
|
||||
if not csv_file:
|
||||
csv_file = self._find_latest_export()
|
||||
|
||||
|
||||
if not csv_file:
|
||||
raise FileNotFoundError("No exported posts found. Run export() first.")
|
||||
|
||||
|
||||
analyzer = EditorialStrategyAnalyzer()
|
||||
return analyzer.run(csv_file)
|
||||
|
||||
def migrate(self, csv_file: str, destination_site: str,
|
||||
create_categories: bool = True, create_tags: bool = True,
|
||||
delete_after: bool = False, status: str = 'draft',
|
||||
output_file: Optional[str] = None,
|
||||
ignore_original_date: bool = False) -> str:
|
||||
"""
|
||||
Migrate posts from CSV file to destination site.
|
||||
|
||||
Args:
|
||||
csv_file: Path to CSV file with posts to migrate (must have 'site' and 'post_id' columns)
|
||||
destination_site: Destination site name (mistergeek.net, webscroll.fr, hellogeek.net)
|
||||
create_categories: If True, create categories if they don't exist
|
||||
create_tags: If True, create tags if they don't exist
|
||||
delete_after: If True, delete posts from source after migration
|
||||
status: Status for new posts ('draft', 'publish', 'pending')
|
||||
output_file: Custom output file path for migration report
|
||||
ignore_original_date: If True, use current date instead of original post date
|
||||
|
||||
Returns:
|
||||
Path to migration report CSV
|
||||
"""
|
||||
logger.info(f"🚀 Migrating posts to {destination_site}...")
|
||||
|
||||
migrator = WordPressPostMigrator()
|
||||
return migrator.migrate_posts_from_csv(
|
||||
csv_file=csv_file,
|
||||
destination_site=destination_site,
|
||||
create_categories=create_categories,
|
||||
create_tags=create_tags,
|
||||
delete_after=delete_after,
|
||||
status=status,
|
||||
output_file=output_file,
|
||||
ignore_original_date=ignore_original_date
|
||||
)
|
||||
|
||||
def migrate_by_filter(self, source_site: str, destination_site: str,
|
||||
category_filter: Optional[List[str]] = None,
|
||||
tag_filter: Optional[List[str]] = None,
|
||||
date_after: Optional[str] = None,
|
||||
date_before: Optional[str] = None,
|
||||
status_filter: Optional[List[str]] = None,
|
||||
create_categories: bool = True,
|
||||
create_tags: bool = True,
|
||||
delete_after: bool = False,
|
||||
status: str = 'draft',
|
||||
limit: Optional[int] = None,
|
||||
ignore_original_date: bool = False) -> str:
|
||||
"""
|
||||
Migrate posts based on filters.
|
||||
|
||||
Args:
|
||||
source_site: Source site name
|
||||
destination_site: Destination site name
|
||||
category_filter: List of category names to filter by
|
||||
tag_filter: List of tag names to filter by
|
||||
date_after: Only migrate posts after this date (YYYY-MM-DD)
|
||||
date_before: Only migrate posts before this date (YYYY-MM-DD)
|
||||
status_filter: List of statuses to filter by (e.g., ['publish', 'draft'])
|
||||
create_categories: If True, create categories if they don't exist
|
||||
create_tags: If True, create tags if they don't exist
|
||||
delete_after: If True, delete posts from source after migration
|
||||
status: Status for new posts
|
||||
limit: Maximum number of posts to migrate
|
||||
ignore_original_date: If True, use current date instead of original post date
|
||||
|
||||
Returns:
|
||||
Path to migration report CSV
|
||||
"""
|
||||
logger.info(f"🚀 Migrating posts from {source_site} to {destination_site}...")
|
||||
|
||||
migrator = WordPressPostMigrator()
|
||||
return migrator.migrate_posts_by_filter(
|
||||
source_site=source_site,
|
||||
destination_site=destination_site,
|
||||
category_filter=category_filter,
|
||||
tag_filter=tag_filter,
|
||||
date_after=date_after,
|
||||
date_before=date_before,
|
||||
status_filter=status_filter,
|
||||
create_categories=create_categories,
|
||||
create_tags=create_tags,
|
||||
delete_after=delete_after,
|
||||
status=status,
|
||||
limit=limit,
|
||||
ignore_original_date=ignore_original_date
|
||||
)
|
||||
|
||||
def status(self) -> dict:
|
||||
"""Get status of output files."""
|
||||
|
||||
166
src/seo/cli.py
166
src/seo/cli.py
@@ -37,17 +37,38 @@ Examples:
|
||||
parser.add_argument('args', nargs='*', help='Arguments for the command')
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Show what would be done')
|
||||
parser.add_argument('--fields', '-f', nargs='+',
|
||||
parser.add_argument('--fields', '-f', nargs='+',
|
||||
choices=['title', 'meta_description', 'categories', 'site'],
|
||||
help='Fields to analyze')
|
||||
parser.add_argument('--update', '-u', action='store_true', help='Update input file')
|
||||
parser.add_argument('--output', '-o', help='Output file path')
|
||||
parser.add_argument('--confidence', '-c', choices=['High', 'Medium', 'Low'],
|
||||
parser.add_argument('--confidence', '-c', choices=['High', 'Medium', 'Low'],
|
||||
default='Medium', help='Confidence threshold for category apply')
|
||||
parser.add_argument('--site', '-s', choices=['mistergeek.net', 'webscroll.fr', 'hellogeek.net'],
|
||||
help='WordPress site for category operations')
|
||||
parser.add_argument('--description', '-d', help='Category description')
|
||||
parser.add_argument('--strict', action='store_true', help='Strict confidence matching (exact match only)')
|
||||
|
||||
# Export arguments
|
||||
parser.add_argument('--author', nargs='+', help='Filter by author name(s) for export')
|
||||
parser.add_argument('--author-id', type=int, nargs='+', help='Filter by author ID(s) for export')
|
||||
|
||||
# Migration arguments
|
||||
parser.add_argument('--destination', '--to', choices=['mistergeek.net', 'webscroll.fr', 'hellogeek.net'],
|
||||
help='Destination site for migration')
|
||||
parser.add_argument('--source', '--from', choices=['mistergeek.net', 'webscroll.fr', 'hellogeek.net'],
|
||||
help='Source site for filtered migration')
|
||||
parser.add_argument('--keep-source', action='store_true', help='Keep posts on source site (default: delete after migration)')
|
||||
parser.add_argument('--post-status', choices=['draft', 'publish', 'pending'], default='draft',
|
||||
help='Status for migrated posts (default: draft)')
|
||||
parser.add_argument('--no-categories', action='store_true', help='Do not create categories automatically')
|
||||
parser.add_argument('--no-tags', action='store_true', help='Do not create tags automatically')
|
||||
parser.add_argument('--category-filter', nargs='+', help='Filter by category names (for filtered migration)')
|
||||
parser.add_argument('--tag-filter', nargs='+', help='Filter by tag names (for filtered migration)')
|
||||
parser.add_argument('--date-after', help='Migrate posts after this date (YYYY-MM-DD)')
|
||||
parser.add_argument('--date-before', help='Migrate posts before this date (YYYY-MM-DD)')
|
||||
parser.add_argument('--limit', type=int, help='Limit number of posts to migrate')
|
||||
parser.add_argument('--ignore-original-date', action='store_true', help='Use current date instead of original post date')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -73,6 +94,7 @@ Examples:
|
||||
'category_apply': cmd_category_apply,
|
||||
'category_create': cmd_category_create,
|
||||
'editorial_strategy': cmd_editorial_strategy,
|
||||
'migrate': cmd_migrate,
|
||||
'status': cmd_status,
|
||||
'help': cmd_help,
|
||||
}
|
||||
@@ -104,8 +126,19 @@ def cmd_export(app, args):
|
||||
"""Export all posts."""
|
||||
if args.dry_run:
|
||||
print("Would export all posts from WordPress sites")
|
||||
if args.author:
|
||||
print(f" Author filter: {args.author}")
|
||||
if args.author_id:
|
||||
print(f" Author ID filter: {args.author_id}")
|
||||
return 0
|
||||
app.export()
|
||||
|
||||
result = app.export(
|
||||
author_filter=args.author,
|
||||
author_ids=args.author_id,
|
||||
site_filter=args.site
|
||||
)
|
||||
if result:
|
||||
print(f"✅ Export completed! Output: {result}")
|
||||
return 0
|
||||
|
||||
|
||||
@@ -241,12 +274,12 @@ def cmd_editorial_strategy(app, args):
|
||||
if args.dry_run:
|
||||
print("Would analyze editorial strategy and recommend migrations")
|
||||
return 0
|
||||
|
||||
|
||||
csv_file = args.args[0] if args.args else None
|
||||
|
||||
|
||||
print("Analyzing editorial strategy...")
|
||||
results = app.editorial_strategy(csv_file=csv_file)
|
||||
|
||||
|
||||
if results and results.get('report_file'):
|
||||
print(f"\n✅ Editorial strategy analysis complete!")
|
||||
print(f" Report: {results['report_file']}")
|
||||
@@ -259,6 +292,94 @@ def cmd_editorial_strategy(app, args):
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_migrate(app, args):
|
||||
"""Migrate posts between websites."""
|
||||
if args.dry_run:
|
||||
print("Would migrate posts between websites")
|
||||
if args.destination:
|
||||
print(f" Destination: {args.destination}")
|
||||
if args.source:
|
||||
print(f" Source: {args.source}")
|
||||
return 0
|
||||
|
||||
# Validate required arguments
|
||||
if not args.destination:
|
||||
print("❌ Destination site required. Use --destination mistergeek.net|webscroll.fr|hellogeek.net")
|
||||
return 1
|
||||
|
||||
delete_after = not args.keep_source
|
||||
create_categories = not args.no_categories
|
||||
create_tags = not args.no_tags
|
||||
|
||||
# Check if using filtered migration or CSV-based migration
|
||||
if args.source:
|
||||
# Filtered migration
|
||||
print(f"Migrating posts from {args.source} to {args.destination}")
|
||||
print(f"Post status: {args.post_status}")
|
||||
print(f"Delete after migration: {delete_after}")
|
||||
if args.category_filter:
|
||||
print(f"Category filter: {args.category_filter}")
|
||||
if args.tag_filter:
|
||||
print(f"Tag filter: {args.tag_filter}")
|
||||
if args.date_after:
|
||||
print(f"Date after: {args.date_after}")
|
||||
if args.date_before:
|
||||
print(f"Date before: {args.date_before}")
|
||||
if args.limit:
|
||||
print(f"Limit: {args.limit}")
|
||||
|
||||
result = app.migrate_by_filter(
|
||||
source_site=args.source,
|
||||
destination_site=args.destination,
|
||||
category_filter=args.category_filter,
|
||||
tag_filter=args.tag_filter,
|
||||
date_after=args.date_after,
|
||||
date_before=args.date_before,
|
||||
status_filter=None,
|
||||
create_categories=create_categories,
|
||||
create_tags=create_tags,
|
||||
delete_after=delete_after,
|
||||
status=args.post_status,
|
||||
limit=args.limit,
|
||||
ignore_original_date=args.ignore_original_date
|
||||
)
|
||||
|
||||
if result:
|
||||
print(f"\n✅ Migration completed!")
|
||||
print(f" Report: {result}")
|
||||
else:
|
||||
# CSV-based migration
|
||||
csv_file = args.args[0] if args.args else None
|
||||
|
||||
if not csv_file:
|
||||
print("❌ CSV file required. Provide path to CSV with 'site' and 'post_id' columns")
|
||||
print(" Usage: seo migrate <csv_file> --destination <site>")
|
||||
print(" Or use filtered migration: seo migrate --source <site> --destination <site>")
|
||||
return 1
|
||||
|
||||
print(f"Migrating posts from CSV: {csv_file}")
|
||||
print(f"Destination: {args.destination}")
|
||||
print(f"Post status: {args.post_status}")
|
||||
print(f"Delete after migration: {delete_after}")
|
||||
|
||||
result = app.migrate(
|
||||
csv_file=csv_file,
|
||||
destination_site=args.destination,
|
||||
create_categories=create_categories,
|
||||
create_tags=create_tags,
|
||||
delete_after=delete_after,
|
||||
status=args.post_status,
|
||||
output_file=args.output,
|
||||
ignore_original_date=args.ignore_original_date
|
||||
)
|
||||
|
||||
if result:
|
||||
print(f"\n✅ Migration completed!")
|
||||
print(f" Report: {result}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_status(app, args):
|
||||
"""Show status."""
|
||||
if args.dry_run:
|
||||
@@ -285,6 +406,9 @@ SEO Automation CLI - Available Commands
|
||||
|
||||
Export & Analysis:
|
||||
export Export all posts from WordPress sites
|
||||
export --author "John Doe" Export posts by specific author
|
||||
export --author-id 1 2 Export posts by author IDs
|
||||
export -s mistergeek.net Export from specific site only
|
||||
analyze [csv_file] Analyze posts with AI
|
||||
analyze -f title Analyze specific fields (title, meta_description, categories, site)
|
||||
analyze -u Update input CSV with new columns (creates backup)
|
||||
@@ -299,11 +423,35 @@ Category Management:
|
||||
Strategy & Migration:
|
||||
editorial_strategy [csv] Analyze editorial lines and recommend migrations
|
||||
editorial_strategy Get migration recommendations between sites
|
||||
migrate <csv> --destination <site> Migrate posts from CSV to destination site
|
||||
migrate --source <site> --destination <site> Migrate posts with filters
|
||||
migrate --source A --to B --category-filter "VPN" Migrate specific categories
|
||||
migrate --source A --to B --date-after 2024-01-01 --limit 10
|
||||
|
||||
Utility:
|
||||
status Show output files status
|
||||
help Show this help message
|
||||
|
||||
Export Options:
|
||||
--author Filter by author name(s) (case-insensitive, partial match)
|
||||
--author-id Filter by author ID(s)
|
||||
--site, -s Export from specific site only
|
||||
|
||||
Migration Options:
|
||||
--destination, --to Destination site: mistergeek.net, webscroll.fr, hellogeek.net
|
||||
--source, --from Source site for filtered migration
|
||||
--keep-source Keep posts on source site (default: delete after migration)
|
||||
--post-status Status for migrated posts: draft, publish, pending (default: draft)
|
||||
--no-categories Do not create categories automatically
|
||||
--no-tags Do not create tags automatically
|
||||
--category-filter Filter by category names (for filtered migration)
|
||||
--tag-filter Filter by tag names (for filtered migration)
|
||||
--date-after Migrate posts after this date (YYYY-MM-DD)
|
||||
--date-before Migrate posts before this date (YYYY-MM-DD)
|
||||
--limit Limit number of posts to migrate
|
||||
--ignore-original-date Use current date instead of original post date
|
||||
--output, -o Custom output file path for migration report
|
||||
|
||||
Options:
|
||||
--verbose, -v Enable verbose logging
|
||||
--dry-run Show what would be done without doing it
|
||||
@@ -317,11 +465,17 @@ Options:
|
||||
|
||||
Examples:
|
||||
seo export
|
||||
seo export --author "John Doe"
|
||||
seo export --author-id 1 2
|
||||
seo export -s mistergeek.net --author "admin"
|
||||
seo analyze -f title categories
|
||||
seo category_propose
|
||||
seo category_apply -s mistergeek.net -c Medium
|
||||
seo category_create -s webscroll.fr "Torrent Clients"
|
||||
seo editorial_strategy
|
||||
seo migrate posts_to_migrate.csv --destination mistergeek.net
|
||||
seo migrate --source webscroll.fr --destination mistergeek.net --category-filter VPN
|
||||
seo migrate --source A --to B --date-after 2024-01-01 --limit 10 --keep-source
|
||||
seo status
|
||||
""")
|
||||
return 0
|
||||
|
||||
@@ -20,11 +20,21 @@ logger = logging.getLogger(__name__)
|
||||
class PostExporter:
|
||||
"""Export posts from WordPress sites to CSV."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the exporter."""
|
||||
def __init__(self, author_filter: Optional[List[str]] = None,
|
||||
author_ids: Optional[List[int]] = None):
|
||||
"""
|
||||
Initialize the exporter.
|
||||
|
||||
Args:
|
||||
author_filter: List of author names to filter by (case-insensitive)
|
||||
author_ids: List of author IDs to filter by
|
||||
"""
|
||||
self.sites = Config.WORDPRESS_SITES
|
||||
self.all_posts = []
|
||||
self.category_cache = {}
|
||||
self.author_filter = author_filter
|
||||
self.author_ids = author_ids
|
||||
self.author_cache = {} # Cache author info by site
|
||||
|
||||
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, Dict]:
|
||||
"""Fetch category names from a WordPress site."""
|
||||
@@ -50,8 +60,55 @@ class PostExporter:
|
||||
self.category_cache[site_name] = categories
|
||||
return categories
|
||||
|
||||
def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
|
||||
"""Fetch all posts from a WordPress site."""
|
||||
def fetch_authors(self, site_name: str, site_config: Dict) -> Dict[int, Dict]:
|
||||
"""
|
||||
Fetch all authors/users from a WordPress site.
|
||||
|
||||
Returns:
|
||||
Dict mapping author ID to author data (name, slug)
|
||||
"""
|
||||
if site_name in self.author_cache:
|
||||
return self.author_cache[site_name]
|
||||
|
||||
logger.info(f" Fetching authors from {site_name}...")
|
||||
authors = {}
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/users"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
try:
|
||||
response = requests.get(api_url, params={'per_page': 100}, auth=auth, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
for user in response.json():
|
||||
authors[user['id']] = {
|
||||
'id': user['id'],
|
||||
'name': user.get('name', ''),
|
||||
'slug': user.get('slug', ''),
|
||||
'description': user.get('description', '')
|
||||
}
|
||||
logger.info(f" ✓ Fetched {len(authors)} authors")
|
||||
except Exception as e:
|
||||
logger.warning(f" Could not fetch authors from {site_name}: {e}")
|
||||
# Fallback: create empty dict if authors can't be fetched
|
||||
# Author IDs will still be exported, just without names
|
||||
|
||||
self.author_cache[site_name] = authors
|
||||
return authors
|
||||
|
||||
def fetch_posts_from_site(self, site_name: str, site_config: Dict,
|
||||
authors_map: Optional[Dict[int, Dict]] = None) -> List[Dict]:
|
||||
"""
|
||||
Fetch all posts from a WordPress site.
|
||||
|
||||
Args:
|
||||
site_name: Site name
|
||||
site_config: Site configuration
|
||||
authors_map: Optional authors mapping for filtering
|
||||
|
||||
Returns:
|
||||
List of post data
|
||||
"""
|
||||
logger.info(f"\nFetching posts from {site_name}...")
|
||||
|
||||
posts = []
|
||||
@@ -59,14 +116,23 @@ class PostExporter:
|
||||
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
# Build base params
|
||||
base_params = {'page': 1, 'per_page': 100, '_embed': True}
|
||||
|
||||
# Add author filter if specified
|
||||
if self.author_ids:
|
||||
base_params['author'] = ','.join(map(str, self.author_ids))
|
||||
logger.info(f" Filtering by author IDs: {self.author_ids}")
|
||||
|
||||
for status in ['publish', 'draft']:
|
||||
page = 1
|
||||
while True:
|
||||
try:
|
||||
params = {**base_params, 'page': page, 'status': status}
|
||||
logger.info(f" Fetching page {page} ({status} posts)...")
|
||||
response = requests.get(
|
||||
api_url,
|
||||
params={'page': page, 'per_page': 100, 'status': status},
|
||||
params=params,
|
||||
auth=auth,
|
||||
timeout=10
|
||||
)
|
||||
@@ -76,8 +142,29 @@ class PostExporter:
|
||||
if not page_posts:
|
||||
break
|
||||
|
||||
# Filter by author name if specified
|
||||
if self.author_filter and authors_map:
|
||||
filtered_posts = []
|
||||
for post in page_posts:
|
||||
author_id = post.get('author')
|
||||
if author_id and author_id in authors_map:
|
||||
author_name = authors_map[author_id]['name'].lower()
|
||||
author_slug = authors_map[author_id]['slug'].lower()
|
||||
|
||||
# Check if author matches filter
|
||||
for filter_name in self.author_filter:
|
||||
filter_lower = filter_name.lower()
|
||||
if (filter_lower in author_name or
|
||||
filter_lower == author_slug):
|
||||
filtered_posts.append(post)
|
||||
break
|
||||
|
||||
page_posts = filtered_posts
|
||||
logger.info(f" ✓ Got {len(page_posts)} posts after author filter")
|
||||
|
||||
posts.extend(page_posts)
|
||||
logger.info(f" ✓ Got {len(page_posts)} posts")
|
||||
if page_posts:
|
||||
logger.info(f" ✓ Got {len(page_posts)} posts")
|
||||
|
||||
page += 1
|
||||
time.sleep(0.5)
|
||||
@@ -94,7 +181,8 @@ class PostExporter:
|
||||
logger.info(f"✓ Total posts from {site_name}: {len(posts)}\n")
|
||||
return posts
|
||||
|
||||
def extract_post_details(self, post: Dict, site_name: str, category_map: Dict) -> Dict:
|
||||
def extract_post_details(self, post: Dict, site_name: str, category_map: Dict,
|
||||
author_map: Optional[Dict[int, Dict]] = None) -> Dict:
|
||||
"""Extract post details for CSV export."""
|
||||
title = post.get('title', {})
|
||||
if isinstance(title, dict):
|
||||
@@ -122,6 +210,13 @@ class PostExporter:
|
||||
for cat_id in category_ids
|
||||
]) if category_ids else ''
|
||||
|
||||
# Get author name from author map
|
||||
author_id = post.get('author', '')
|
||||
author_name = ''
|
||||
if author_map and author_id:
|
||||
author_data = author_map.get(author_id, {})
|
||||
author_name = author_data.get('name', '')
|
||||
|
||||
return {
|
||||
'site': site_name,
|
||||
'post_id': post['id'],
|
||||
@@ -129,7 +224,8 @@ class PostExporter:
|
||||
'title': title.strip(),
|
||||
'slug': post.get('slug', ''),
|
||||
'url': post.get('link', ''),
|
||||
'author_id': post.get('author', ''),
|
||||
'author_id': author_id,
|
||||
'author_name': author_name,
|
||||
'date_published': post.get('date', ''),
|
||||
'date_modified': post.get('modified', ''),
|
||||
'categories': category_names,
|
||||
@@ -158,7 +254,7 @@ class PostExporter:
|
||||
return ""
|
||||
|
||||
fieldnames = [
|
||||
'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id',
|
||||
'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id', 'author_name',
|
||||
'date_published', 'date_modified', 'categories', 'tags', 'excerpt',
|
||||
'content_preview', 'seo_title', 'meta_description', 'focus_keyword', 'word_count',
|
||||
]
|
||||
@@ -173,24 +269,46 @@ class PostExporter:
|
||||
logger.info(f"✓ CSV exported to: {output_file}")
|
||||
return str(output_file)
|
||||
|
||||
def run(self) -> str:
|
||||
"""Run the complete export process."""
|
||||
def run(self, site_filter: Optional[str] = None) -> str:
|
||||
"""
|
||||
Run the complete export process.
|
||||
|
||||
Args:
|
||||
site_filter: Optional site name to export from (default: all sites)
|
||||
|
||||
Returns:
|
||||
Path to exported CSV file
|
||||
"""
|
||||
logger.info("="*70)
|
||||
logger.info("EXPORTING ALL POSTS")
|
||||
logger.info("="*70)
|
||||
|
||||
if self.author_filter:
|
||||
logger.info(f"Author filter: {self.author_filter}")
|
||||
if self.author_ids:
|
||||
logger.info(f"Author IDs: {self.author_ids}")
|
||||
if site_filter:
|
||||
logger.info(f"Site filter: {site_filter}")
|
||||
|
||||
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
|
||||
|
||||
for site_name, config in self.sites.items():
|
||||
# Skip sites if filter is specified
|
||||
if site_filter and site_name != site_filter:
|
||||
logger.info(f"Skipping {site_name} (not in filter)")
|
||||
continue
|
||||
|
||||
categories = self.fetch_category_names(site_name, config)
|
||||
posts = self.fetch_posts_from_site(site_name, config)
|
||||
authors = self.fetch_authors(site_name, config)
|
||||
posts = self.fetch_posts_from_site(site_name, config, authors)
|
||||
|
||||
if posts:
|
||||
for post in posts:
|
||||
post_details = self.extract_post_details(post, site_name, categories)
|
||||
post_details = self.extract_post_details(post, site_name, categories, authors)
|
||||
self.all_posts.append(post_details)
|
||||
|
||||
if not self.all_posts:
|
||||
logger.error("No posts found on any site")
|
||||
logger.warning("No posts found matching criteria")
|
||||
return ""
|
||||
|
||||
self.all_posts.sort(key=lambda x: (x['site'], x['post_id']))
|
||||
|
||||
1007
src/seo/post_migrator.py
Normal file
1007
src/seo/post_migrator.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user