Add category management - Create and update WordPress categories from AI
New Features: - Create WordPress categories based on AI proposals - Bulk assign posts to categories - Confidence-based filtering (High/Medium/Low) - Manual category creation - Dry run mode for safe preview New Commands: - seo category_apply - Apply AI proposals to WordPress - seo category_create - Create new category manually New Modules: - src/seo/category_manager.py - WordPress category management - WordPressCategoryManager: Create/get categories - CategoryAssignmentProcessor: Process AI proposals Features: - Automatic category creation if doesn't exist - Bulk category assignment - Confidence threshold filtering - Append mode (doesn't replace existing categories) - Comprehensive error handling - Detailed statistics and logging Usage: ./seo category_propose # Get AI proposals ./seo category_apply -s mistergeek.net # Apply to site ./seo category_apply -s site -c High # High confidence only ./seo category_create -s site "New Cat" # Create category ./seo category_apply --dry-run # Preview changes Documentation: - CATEGORY_MANAGEMENT_GUIDE.md - Complete guide Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
408
src/seo/category_manager.py
Normal file
408
src/seo/category_manager.py
Normal file
@@ -0,0 +1,408 @@
|
||||
"""
|
||||
Category Manager - Create, update, and assign categories in WordPress
|
||||
"""
|
||||
|
||||
import csv
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
|
||||
from .config import Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WordPressCategoryManager:
|
||||
"""Manage WordPress categories: create, update, and assign to posts."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize category manager."""
|
||||
self.sites = Config.WORDPRESS_SITES
|
||||
self.category_cache = {} # Cache categories by site
|
||||
|
||||
def get_site_auth(self, site_name: str) -> Tuple[str, HTTPBasicAuth]:
|
||||
"""Get site URL and auth for a given site name."""
|
||||
site_config = self.sites.get(site_name)
|
||||
if not site_config:
|
||||
raise ValueError(f"Site not found: {site_name}")
|
||||
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
return base_url, auth
|
||||
|
||||
def fetch_categories(self, site_name: str) -> Dict[str, int]:
|
||||
"""
|
||||
Fetch all categories from a WordPress site.
|
||||
|
||||
Returns:
|
||||
Dict mapping category name (slug) to category ID
|
||||
"""
|
||||
if site_name in self.category_cache:
|
||||
return self.category_cache[site_name]
|
||||
|
||||
logger.info(f"Fetching categories from {site_name}...")
|
||||
|
||||
try:
|
||||
base_url, auth = self.get_site_auth(site_name)
|
||||
categories = {}
|
||||
page = 1
|
||||
|
||||
while True:
|
||||
response = requests.get(
|
||||
f"{base_url}/wp-json/wp/v2/categories",
|
||||
params={'per_page': 100, 'page': page},
|
||||
auth=auth,
|
||||
timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
page_categories = response.json()
|
||||
if not page_categories:
|
||||
break
|
||||
|
||||
for cat in page_categories:
|
||||
categories[cat['slug'].lower()] = {
|
||||
'id': cat['id'],
|
||||
'name': cat['name'],
|
||||
'slug': cat['slug'],
|
||||
'count': cat.get('count', 0)
|
||||
}
|
||||
|
||||
# Check for more pages
|
||||
if len(page_categories) < 100:
|
||||
break
|
||||
page += 1
|
||||
|
||||
self.category_cache[site_name] = categories
|
||||
logger.info(f"✓ Fetched {len(categories)} categories from {site_name}")
|
||||
return categories
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching categories from {site_name}: {e}")
|
||||
return {}
|
||||
|
||||
def create_category(self, site_name: str, category_name: str,
|
||||
description: str = '', parent_id: int = 0) -> Optional[int]:
|
||||
"""
|
||||
Create a new category in WordPress.
|
||||
|
||||
Args:
|
||||
site_name: Site to create category on
|
||||
category_name: Name of the category
|
||||
description: Category description
|
||||
parent_id: Parent category ID (0 for top-level)
|
||||
|
||||
Returns:
|
||||
Category ID if successful, None otherwise
|
||||
"""
|
||||
try:
|
||||
base_url, auth = self.get_site_auth(site_name)
|
||||
|
||||
# Create slug from name
|
||||
slug = category_name.lower().replace(' ', '-').replace('/', '-')
|
||||
|
||||
logger.info(f"Creating category '{category_name}' on {site_name}...")
|
||||
|
||||
response = requests.post(
|
||||
f"{base_url}/wp-json/wp/v2/categories",
|
||||
json={
|
||||
'name': category_name,
|
||||
'slug': slug,
|
||||
'description': description,
|
||||
'parent': parent_id
|
||||
},
|
||||
auth=auth,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if response.status_code == 201:
|
||||
category_data = response.json()
|
||||
logger.info(f"✓ Created category '{category_name}' (ID: {category_data['id']})")
|
||||
|
||||
# Update cache
|
||||
if site_name in self.category_cache:
|
||||
self.category_cache[site_name][slug] = {
|
||||
'id': category_data['id'],
|
||||
'name': category_data['name'],
|
||||
'slug': slug,
|
||||
'count': 0
|
||||
}
|
||||
|
||||
return category_data['id']
|
||||
elif response.status_code == 409:
|
||||
# Category already exists
|
||||
logger.info(f" Category '{category_name}' already exists")
|
||||
existing = response.json()
|
||||
if isinstance(existing, list) and len(existing) > 0:
|
||||
return existing[0]['id']
|
||||
return None
|
||||
else:
|
||||
logger.error(f"Error creating category: {response.status_code} - {response.text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating category: {e}")
|
||||
return None
|
||||
|
||||
def get_or_create_category(self, site_name: str, category_name: str,
|
||||
description: str = '') -> Optional[int]:
|
||||
"""
|
||||
Get existing category or create it if it doesn't exist.
|
||||
|
||||
Args:
|
||||
site_name: Site to work with
|
||||
category_name: Name of the category
|
||||
description: Category description (used if creating)
|
||||
|
||||
Returns:
|
||||
Category ID
|
||||
"""
|
||||
# Fetch categories if not cached
|
||||
if site_name not in self.category_cache:
|
||||
self.fetch_categories(site_name)
|
||||
|
||||
# Check if category exists
|
||||
slug = category_name.lower().replace(' ', '-').replace('/', '-')
|
||||
categories = self.category_cache.get(site_name, {})
|
||||
|
||||
if slug in categories:
|
||||
logger.info(f"✓ Found existing category '{category_name}' (ID: {categories[slug]['id']})")
|
||||
return categories[slug]['id']
|
||||
|
||||
# Try alternative slug formats
|
||||
alt_slug = category_name.lower().replace(' ', '-')
|
||||
if alt_slug in categories:
|
||||
logger.info(f"✓ Found existing category '{category_name}' (ID: {categories[alt_slug]['id']})")
|
||||
return categories[alt_slug]['id']
|
||||
|
||||
# Create new category
|
||||
return self.create_category(site_name, category_name, description)
|
||||
|
||||
def assign_post_to_category(self, site_name: str, post_id: int,
|
||||
category_id: int, append: bool = True) -> bool:
|
||||
"""
|
||||
Assign a post to a category.
|
||||
|
||||
Args:
|
||||
site_name: Site where post exists
|
||||
post_id: Post ID
|
||||
category_id: Category ID to assign
|
||||
append: If True, add to existing categories; if False, replace all
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
base_url, auth = self.get_site_auth(site_name)
|
||||
|
||||
if append:
|
||||
# Get current categories
|
||||
response = requests.get(
|
||||
f"{base_url}/wp-json/wp/v2/posts/{post_id}",
|
||||
auth=auth,
|
||||
timeout=10
|
||||
)
|
||||
if response.status_code == 200:
|
||||
post_data = response.json()
|
||||
current_categories = post_data.get('categories', [])
|
||||
if category_id not in current_categories:
|
||||
current_categories.append(category_id)
|
||||
else:
|
||||
logger.error(f"Could not fetch post {post_id}")
|
||||
return False
|
||||
else:
|
||||
current_categories = [category_id]
|
||||
|
||||
# Update post with new categories
|
||||
response = requests.post(
|
||||
f"{base_url}/wp-json/wp/v2/posts/{post_id}",
|
||||
json={'categories': current_categories},
|
||||
auth=auth,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
logger.info(f"✓ Assigned post {post_id} to category {category_id}")
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Error assigning category: {response.status_code}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error assigning category: {e}")
|
||||
return False
|
||||
|
||||
def bulk_assign_categories(self, site_name: str,
|
||||
post_category_map: Dict[int, List[int]]) -> Dict[str, int]:
|
||||
"""
|
||||
Bulk assign posts to categories.
|
||||
|
||||
Args:
|
||||
site_name: Site to work with
|
||||
post_category_map: Dict mapping post_id to list of category_ids
|
||||
|
||||
Returns:
|
||||
Statistics dict with success/failure counts
|
||||
"""
|
||||
stats = {'success': 0, 'failed': 0}
|
||||
|
||||
logger.info(f"Bulk assigning categories on {site_name}...")
|
||||
|
||||
for post_id, category_ids in post_category_map.items():
|
||||
for category_id in category_ids:
|
||||
if self.assign_post_to_category(site_name, post_id, category_id):
|
||||
stats['success'] += 1
|
||||
else:
|
||||
stats['failed'] += 1
|
||||
|
||||
logger.info(f"✓ Bulk assignment complete: {stats['success']} successful, {stats['failed']} failed")
|
||||
return stats
|
||||
|
||||
|
||||
class CategoryAssignmentProcessor:
|
||||
"""Process AI category proposals and apply them to WordPress."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize processor."""
|
||||
self.category_manager = WordPressCategoryManager()
|
||||
self.processing_stats = {
|
||||
'total_posts': 0,
|
||||
'categories_created': 0,
|
||||
'posts_updated': 0,
|
||||
'errors': 0
|
||||
}
|
||||
|
||||
def load_proposals(self, proposals_csv: str) -> List[Dict]:
|
||||
"""Load category proposals from CSV."""
|
||||
logger.info(f"Loading proposals from: {proposals_csv}")
|
||||
|
||||
try:
|
||||
with open(proposals_csv, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
proposals = list(reader)
|
||||
|
||||
logger.info(f"✓ Loaded {len(proposals)} proposals")
|
||||
return proposals
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading proposals: {e}")
|
||||
return []
|
||||
|
||||
def process_proposals(self, proposals: List[Dict], site_name: str,
|
||||
confidence_threshold: str = 'Medium',
|
||||
dry_run: bool = False) -> Dict[str, int]:
|
||||
"""
|
||||
Process AI category proposals and apply to WordPress.
|
||||
|
||||
Args:
|
||||
proposals: List of proposal dicts from CSV
|
||||
site_name: Site to apply changes to
|
||||
confidence_threshold: Minimum confidence to apply (High, Medium, Low)
|
||||
dry_run: If True, don't actually make changes
|
||||
|
||||
Returns:
|
||||
Statistics dict
|
||||
"""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("PROCESSING CATEGORY PROPOSALS")
|
||||
logger.info("="*70)
|
||||
|
||||
if dry_run:
|
||||
logger.info("DRY RUN - No changes will be made")
|
||||
|
||||
# Filter by confidence
|
||||
confidence_order = {'High': 3, 'Medium': 2, 'Low': 1}
|
||||
min_confidence = confidence_order.get(confidence_threshold, 2)
|
||||
|
||||
filtered_proposals = [
|
||||
p for p in proposals
|
||||
if confidence_order.get(p.get('category_confidence', 'Medium'), 2) >= min_confidence
|
||||
]
|
||||
|
||||
logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence >= {confidence_threshold})")
|
||||
|
||||
# Fetch existing categories
|
||||
self.category_manager.fetch_categories(site_name)
|
||||
|
||||
# Process each proposal
|
||||
for i, proposal in enumerate(filtered_proposals, 1):
|
||||
logger.info(f"\n[{i}/{len(filtered_proposals)}] Processing post {proposal.get('post_id')}...")
|
||||
|
||||
post_id = int(proposal.get('post_id', 0))
|
||||
proposed_category = proposal.get('proposed_category', '')
|
||||
current_categories = proposal.get('current_categories', '')
|
||||
confidence = proposal.get('category_confidence', 'Medium')
|
||||
|
||||
if not post_id or not proposed_category:
|
||||
logger.warning(" Skipping: Missing post_id or proposed_category")
|
||||
self.processing_stats['errors'] += 1
|
||||
continue
|
||||
|
||||
if dry_run:
|
||||
logger.info(f" Would assign to: {proposed_category}")
|
||||
continue
|
||||
|
||||
# Get or create the category
|
||||
category_id = self.category_manager.get_or_create_category(
|
||||
site_name,
|
||||
proposed_category,
|
||||
description=f"AI-proposed category (confidence: {confidence})"
|
||||
)
|
||||
|
||||
if category_id:
|
||||
self.processing_stats['categories_created'] += 1
|
||||
|
||||
# Assign post to category
|
||||
if self.category_manager.assign_post_to_category(
|
||||
site_name, post_id, category_id, append=True
|
||||
):
|
||||
self.processing_stats['posts_updated'] += 1
|
||||
logger.info(f" ✓ Assigned to '{proposed_category}'")
|
||||
else:
|
||||
self.processing_stats['errors'] += 1
|
||||
else:
|
||||
self.processing_stats['errors'] += 1
|
||||
logger.error(f" Failed to get/create category '{proposed_category}'")
|
||||
|
||||
self.processing_stats['total_posts'] = len(filtered_proposals)
|
||||
|
||||
# Print summary
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("PROCESSING SUMMARY")
|
||||
logger.info("="*70)
|
||||
logger.info(f"Total proposals processed: {self.processing_stats['total_posts']}")
|
||||
logger.info(f"Categories created/found: {self.processing_stats['categories_created']}")
|
||||
logger.info(f"Posts updated: {self.processing_stats['posts_updated']}")
|
||||
logger.info(f"Errors: {self.processing_stats['errors']}")
|
||||
|
||||
return self.processing_stats
|
||||
|
||||
def run(self, proposals_csv: str, site_name: str,
|
||||
confidence_threshold: str = 'Medium',
|
||||
dry_run: bool = False) -> Dict[str, int]:
|
||||
"""
|
||||
Run complete category assignment process.
|
||||
|
||||
Args:
|
||||
proposals_csv: Path to proposals CSV
|
||||
site_name: Site to apply changes to
|
||||
confidence_threshold: Minimum confidence to apply
|
||||
dry_run: If True, preview changes without applying
|
||||
|
||||
Returns:
|
||||
Statistics dict
|
||||
"""
|
||||
proposals = self.load_proposals(proposals_csv)
|
||||
|
||||
if not proposals:
|
||||
logger.error("No proposals to process")
|
||||
return self.processing_stats
|
||||
|
||||
return self.process_proposals(
|
||||
proposals,
|
||||
site_name,
|
||||
confidence_threshold,
|
||||
dry_run
|
||||
)
|
||||
Reference in New Issue
Block a user