Add media importer for migrated posts

- Add import_media command to import featured images
- Fetch media from source site (mistergeek.net)
- Upload to destination site (hellogeek.net)
- Map source media IDs to destination media IDs
- Set featured images on migrated posts
- Use migration report CSV as input
- Support dry-run mode
- Cache media mappings to avoid duplicate uploads

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
Kevin Bataille
2026-02-17 01:50:40 +01:00
parent 6ef268ba80
commit 69e4287366
3 changed files with 537 additions and 1 deletions

View File

@@ -17,6 +17,7 @@ from .meta_description_generator import MetaDescriptionGenerator
from .meta_description_updater import MetaDescriptionUpdater from .meta_description_updater import MetaDescriptionUpdater
from .performance_tracker import SEOPerformanceTracker from .performance_tracker import SEOPerformanceTracker
from .performance_analyzer import PerformanceAnalyzer from .performance_analyzer import PerformanceAnalyzer
from .media_importer import WordPressMediaImporter
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -486,3 +487,24 @@ class SEOApp:
report.append("4. Monitor keyword rankings regularly\n") report.append("4. Monitor keyword rankings regularly\n")
return "\n".join(report) return "\n".join(report)
def import_media(self, migration_report: str,
source_site: str = 'mistergeek.net',
destination_site: str = 'hellogeek.net',
dry_run: bool = True) -> Dict:
"""
Import media from source to destination site for migrated posts.
Args:
migration_report: Path to migration report CSV
source_site: Source site name
destination_site: Destination site name
dry_run: If True, preview without importing
Returns:
Statistics dict
"""
logger.info(f"📸 Importing media from {source_site} to {destination_site}...")
importer = WordPressMediaImporter(source_site, destination_site)
return importer.run_from_migration_report(migration_report, dry_run=dry_run)

View File

@@ -86,6 +86,10 @@ Examples:
parser.add_argument('--start-date', help='Start date YYYY-MM-DD (for API mode)') parser.add_argument('--start-date', help='Start date YYYY-MM-DD (for API mode)')
parser.add_argument('--end-date', help='End date YYYY-MM-DD (for API mode)') parser.add_argument('--end-date', help='End date YYYY-MM-DD (for API mode)')
# Media import arguments
parser.add_argument('--from-site', help='Source site for media import (default: mistergeek.net)')
parser.add_argument('--to-site', help='Destination site for media import (default: hellogeek.net)')
args = parser.parse_args() args = parser.parse_args()
if not args.command: if not args.command:
@@ -116,6 +120,7 @@ Examples:
'performance': cmd_performance, 'performance': cmd_performance,
'keywords': cmd_keywords, 'keywords': cmd_keywords,
'report': cmd_report, 'report': cmd_report,
'import_media': cmd_import_media,
'status': cmd_status, 'status': cmd_status,
'help': cmd_help, 'help': cmd_help,
} }
@@ -598,6 +603,47 @@ def cmd_report(app, args):
return 0 return 0
def cmd_import_media(app, args):
"""Import media from source to destination site for migrated posts."""
if args.dry_run:
print("Would import media")
print(f" Source: {args.from_site or 'mistergeek.net'}")
print(f" Destination: {args.to_site or 'hellogeek.net'}")
if args.args:
print(f" Migration report: {args.args[0]}")
return 0
migration_report = args.args[0] if args.args else None
if not migration_report:
print("❌ Migration report CSV required")
print(" Usage: seo import_media <migration_report.csv>")
return 1
source_site = args.from_site or 'mistergeek.net'
dest_site = args.to_site or 'hellogeek.net'
print(f"Importing media from {source_site} to {dest_site}...")
print(f"Migration report: {migration_report}")
stats = app.import_media(
migration_report=migration_report,
source_site=source_site,
destination_site=dest_site,
dry_run=False
)
if stats:
print(f"\n✅ Media import completed!")
print(f"\n📊 Summary:")
print(f" Total posts: {stats.get('total_posts', 0)}")
print(f" Posts with media: {stats.get('posts_with_media', 0)}")
print(f" Images uploaded: {stats.get('images_uploaded', 0)}")
print(f" Featured images set: {stats.get('featured_images_set', 0)}")
print(f" Errors: {stats.get('errors', 0)}")
return 0
def cmd_help(app, args): def cmd_help(app, args):
"""Show help.""" """Show help."""
print(""" print("""
@@ -638,6 +684,7 @@ Utility:
performance --ga4 analytics.csv --gsc search.csv Analyze with both sources performance --ga4 analytics.csv --gsc search.csv Analyze with both sources
keywords <gsc.csv> Show keyword opportunities keywords <gsc.csv> Show keyword opportunities
report Generate SEO performance report report Generate SEO performance report
import_media <report.csv> Import media for migrated posts
help Show this help message help Show this help message
Export Options: Export Options:

467
src/seo/media_importer.py Normal file
View File

@@ -0,0 +1,467 @@
"""
Media Importer - Import media from one WordPress site to another
Specifically designed for migrated posts
"""
import logging
import os
import tempfile
import requests
from requests.auth import HTTPBasicAuth
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional, Tuple
import csv
from .config import Config
logger = logging.getLogger(__name__)
class WordPressMediaImporter:
"""Import media from source WordPress site to destination site."""
def __init__(self, source_site: str = 'mistergeek.net',
destination_site: str = 'hellogeek.net'):
"""
Initialize media importer.
Args:
source_site: Source site name
destination_site: Destination site name
"""
self.source_site = source_site
self.destination_site = destination_site
self.sites = Config.WORDPRESS_SITES
# Validate sites
if source_site not in self.sites:
raise ValueError(f"Source site '{source_site}' not found")
if destination_site not in self.sites:
raise ValueError(f"Destination site '{destination_site}' not found")
# Setup source
self.source_config = self.sites[source_site]
self.source_url = self.source_config['url'].rstrip('/')
self.source_auth = HTTPBasicAuth(
self.source_config['username'],
self.source_config['password']
)
# Setup destination
self.dest_config = self.sites[destination_site]
self.dest_url = self.dest_config['url'].rstrip('/')
self.dest_auth = HTTPBasicAuth(
self.dest_config['username'],
self.dest_config['password']
)
self.media_cache = {} # Cache source media ID -> dest media ID
self.stats = {
'total_posts': 0,
'posts_with_media': 0,
'images_downloaded': 0,
'images_uploaded': 0,
'featured_images_set': 0,
'errors': 0
}
def fetch_migrated_posts(self, post_ids: Optional[List[int]] = None) -> List[Dict]:
"""
Fetch posts that need media imported.
Args:
post_ids: Specific post IDs to process
Returns:
List of post dicts
"""
logger.info(f"Fetching posts from {self.destination_site}...")
if post_ids:
# Fetch specific posts
posts = []
for post_id in post_ids:
try:
response = requests.get(
f"{self.dest_url}/wp-json/wp/v2/posts/{post_id}",
auth=self.dest_auth,
timeout=10
)
if response.status_code == 200:
posts.append(response.json())
except Exception as e:
logger.error(f"Error fetching post {post_id}: {e}")
return posts
else:
# Fetch recent posts (assuming migrated posts are recent)
try:
response = requests.get(
f"{self.dest_url}/wp-json/wp/v2/posts",
params={
'per_page': 100,
'status': 'publish,draft',
'_embed': True
},
auth=self.dest_auth,
timeout=30
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Error fetching posts: {e}")
return []
def get_source_post(self, post_id: int) -> Optional[Dict]:
"""
Fetch corresponding post from source site.
Args:
post_id: Post ID on source site
Returns:
Post dict or None
"""
try:
response = requests.get(
f"{self.source_url}/wp-json/wp/v2/posts/{post_id}",
auth=self.source_auth,
timeout=10,
params={'_embed': True}
)
if response.status_code == 200:
return response.json()
else:
logger.warning(f"Source post {post_id} not found")
return None
except Exception as e:
logger.error(f"Error fetching source post {post_id}: {e}")
return None
def download_media(self, media_url: str) -> Optional[bytes]:
"""
Download media file from source site.
Args:
media_url: URL of media file
Returns:
File content bytes or None
"""
try:
response = requests.get(media_url, timeout=30)
response.raise_for_status()
return response.content
except Exception as e:
logger.error(f"Error downloading {media_url}: {e}")
return None
def upload_media(self, file_content: bytes, filename: str,
mime_type: str = 'image/jpeg',
alt_text: str = '',
caption: str = '') -> Optional[int]:
"""
Upload media to destination site.
Args:
file_content: File content bytes
filename: Filename for the media
mime_type: MIME type of the file
alt_text: Alt text for the image
caption: Caption for the image
Returns:
Media ID on destination site or None
"""
try:
# Upload file
files = {'file': (filename, file_content, mime_type)}
response = requests.post(
f"{self.dest_url}/wp-json/wp/v2/media",
files=files,
auth=self.dest_auth,
headers={
'Content-Disposition': f'attachment; filename={filename}',
'Content-Type': mime_type
},
timeout=30
)
if response.status_code == 201:
media_data = response.json()
media_id = media_data['id']
# Update alt text and caption
if alt_text or caption:
meta_update = {}
if alt_text:
meta_update['_wp_attachment_image_alt'] = alt_text
if caption:
meta_update['excerpt'] = caption
requests.post(
f"{self.dest_url}/wp-json/wp/v2/media/{media_id}",
json=meta_update,
auth=self.dest_auth,
timeout=10
)
logger.info(f"✓ Uploaded {filename} (ID: {media_id})")
return media_id
else:
logger.error(f"Error uploading {filename}: {response.status_code}")
return None
except Exception as e:
logger.error(f"Error uploading {filename}: {e}")
return None
def import_featured_image(self, source_post: Dict, dest_post_id: int) -> bool:
"""
Import featured image from source post to destination post.
Args:
source_post: Source post dict
dest_post_id: Destination post ID
Returns:
True if successful
"""
# Check if source has featured image
featured_media_id = source_post.get('featured_media')
if not featured_media_id:
logger.info(f" No featured image on source post")
return False
# Check if already imported
if featured_media_id in self.media_cache:
dest_media_id = self.media_cache[featured_media_id]
logger.info(f" Using cached media ID: {dest_media_id}")
else:
# Fetch media details from source
try:
media_response = requests.get(
f"{self.source_url}/wp-json/wp/v2/media/{featured_media_id}",
auth=self.source_auth,
timeout=10
)
if media_response.status_code != 200:
logger.error(f"Could not fetch media {featured_media_id}")
return False
media_data = media_response.json()
# Download media file
media_url = media_data.get('source_url', '')
if not media_url:
# Try alternative URL structure
media_url = media_data.get('guid', {}).get('rendered', '')
file_content = self.download_media(media_url)
if not file_content:
return False
# Extract filename and mime type
filename = media_data.get('slug', 'image.jpg') + '.jpg'
mime_type = media_data.get('mime_type', 'image/jpeg')
alt_text = media_data.get('alt_text', '')
caption = media_data.get('caption', {}).get('rendered', '')
# Upload to destination
dest_media_id = self.upload_media(
file_content, filename, mime_type, alt_text, caption
)
if not dest_media_id:
return False
# Cache the mapping
self.media_cache[featured_media_id] = dest_media_id
self.stats['images_uploaded'] += 1
except Exception as e:
logger.error(f"Error importing featured image: {e}")
return False
# Set featured image on destination post
try:
response = requests.post(
f"{self.dest_url}/wp-json/wp/v2/posts/{dest_post_id}",
json={'featured_media': dest_media_id},
auth=self.dest_auth,
timeout=10
)
if response.status_code == 200:
logger.info(f"✓ Set featured image on post {dest_post_id}")
self.stats['featured_images_set'] += 1
return True
else:
logger.error(f"Error setting featured image: {response.status_code}")
return False
except Exception as e:
logger.error(f"Error setting featured image: {e}")
return False
def import_post_media(self, source_post: Dict, dest_post_id: int) -> int:
"""
Import all media from a post (featured image + inline images).
Args:
source_post: Source post dict
dest_post_id: Destination post ID
Returns:
Number of images imported
"""
images_imported = 0
# Import featured image
if self.import_featured_image(source_post, dest_post_id):
images_imported += 1
# TODO: Import inline images from content
# This would require parsing the content for <img> tags
# and replacing source URLs with destination URLs
return images_imported
def process_posts(self, post_mappings: List[Tuple[int, int]],
dry_run: bool = False) -> Dict:
"""
Process media import for mapped posts.
Args:
post_mappings: List of (source_post_id, dest_post_id) tuples
dry_run: If True, preview without importing
Returns:
Statistics dict
"""
logger.info("\n" + "="*70)
logger.info("MEDIA IMPORTER")
logger.info("="*70)
logger.info(f"Source: {self.source_site}")
logger.info(f"Destination: {self.destination_site}")
logger.info(f"Posts to process: {len(post_mappings)}")
logger.info(f"Dry run: {dry_run}")
logger.info("="*70)
self.stats['total_posts'] = len(post_mappings)
for i, (source_id, dest_id) in enumerate(post_mappings, 1):
logger.info(f"\n[{i}/{len(post_mappings)}] Processing post mapping:")
logger.info(f" Source: {source_id} → Destination: {dest_id}")
# Fetch source post
source_post = self.get_source_post(source_id)
if not source_post:
logger.warning(f" Skipping: Source post not found")
self.stats['errors'] += 1
continue
# Check if source has media
if not source_post.get('featured_media'):
logger.info(f" No featured image to import")
continue
self.stats['posts_with_media'] += 1
if dry_run:
logger.info(f" [DRY RUN] Would import featured image")
self.stats['images_downloaded'] += 1
self.stats['images_uploaded'] += 1
self.stats['featured_images_set'] += 1
else:
# Import media
imported = self.import_post_media(source_post, dest_id)
if imported > 0:
self.stats['images_downloaded'] += imported
# Print summary
logger.info("\n" + "="*70)
logger.info("IMPORT SUMMARY")
logger.info("="*70)
logger.info(f"Total posts: {self.stats['total_posts']}")
logger.info(f"Posts with media: {self.stats['posts_with_media']}")
logger.info(f"Images downloaded: {self.stats['images_downloaded']}")
logger.info(f"Images uploaded: {self.stats['images_uploaded']}")
logger.info(f"Featured images set: {self.stats['featured_images_set']}")
logger.info(f"Errors: {self.stats['errors']}")
logger.info("="*70)
return self.stats
def run_from_csv(self, csv_file: str, dry_run: bool = False) -> Dict:
"""
Import media for posts listed in CSV file.
CSV should have columns: source_post_id, destination_post_id
Args:
csv_file: Path to CSV file with post mappings
dry_run: If True, preview without importing
Returns:
Statistics dict
"""
logger.info(f"Loading post mappings from: {csv_file}")
try:
with open(csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
mappings = []
for row in reader:
source_id = int(row.get('source_post_id', 0))
dest_id = int(row.get('destination_post_id', 0))
if source_id and dest_id:
mappings.append((source_id, dest_id))
logger.info(f"✓ Loaded {len(mappings)} post mappings")
except Exception as e:
logger.error(f"Error loading CSV: {e}")
return self.stats
return self.process_posts(mappings, dry_run=dry_run)
def run_from_migration_report(self, report_file: str,
dry_run: bool = False) -> Dict:
"""
Import media using migration report CSV.
Args:
report_file: Path to migration report CSV
dry_run: If True, preview without importing
Returns:
Statistics dict
"""
logger.info(f"Loading migration report: {report_file}")
try:
with open(report_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
mappings = []
for row in reader:
source_id = int(row.get('source_post_id', 0))
dest_id = int(row.get('destination_post_id', 0))
if source_id and dest_id:
mappings.append((source_id, dest_id))
logger.info(f"✓ Loaded {len(mappings)} post mappings from migration report")
except Exception as e:
logger.error(f"Error loading migration report: {e}")
return self.stats
return self.process_posts(mappings, dry_run=dry_run)