Add category management - Create and update WordPress categories from AI

New Features:
- Create WordPress categories based on AI proposals
- Bulk assign posts to categories
- Confidence-based filtering (High/Medium/Low)
- Manual category creation
- Dry run mode for safe preview

New Commands:
- seo category_apply - Apply AI proposals to WordPress
- seo category_create - Create new category manually

New Modules:
- src/seo/category_manager.py - WordPress category management
  - WordPressCategoryManager: Create/get categories
  - CategoryAssignmentProcessor: Process AI proposals

Features:
- Automatic category creation if doesn't exist
- Bulk category assignment
- Confidence threshold filtering
- Append mode (doesn't replace existing categories)
- Comprehensive error handling
- Detailed statistics and logging

Usage:
./seo category_propose                    # Get AI proposals
./seo category_apply -s mistergeek.net    # Apply to site
./seo category_apply -s site -c High      # High confidence only
./seo category_create -s site "New Cat"   # Create category
./seo category_apply --dry-run            # Preview changes

Documentation:
- CATEGORY_MANAGEMENT_GUIDE.md - Complete guide

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
Kevin Bataille
2026-02-16 15:25:33 +01:00
parent c8fb141cdd
commit 3c4b72c0ea
5 changed files with 824 additions and 3 deletions

408
src/seo/category_manager.py Normal file
View File

@@ -0,0 +1,408 @@
"""
Category Manager - Create, update, and assign categories in WordPress
"""
import csv
import logging
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional, Tuple
import requests
from requests.auth import HTTPBasicAuth
from .config import Config
logger = logging.getLogger(__name__)
class WordPressCategoryManager:
"""Manage WordPress categories: create, update, and assign to posts."""
def __init__(self):
"""Initialize category manager."""
self.sites = Config.WORDPRESS_SITES
self.category_cache = {} # Cache categories by site
def get_site_auth(self, site_name: str) -> Tuple[str, HTTPBasicAuth]:
"""Get site URL and auth for a given site name."""
site_config = self.sites.get(site_name)
if not site_config:
raise ValueError(f"Site not found: {site_name}")
base_url = site_config['url'].rstrip('/')
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
return base_url, auth
def fetch_categories(self, site_name: str) -> Dict[str, int]:
"""
Fetch all categories from a WordPress site.
Returns:
Dict mapping category name (slug) to category ID
"""
if site_name in self.category_cache:
return self.category_cache[site_name]
logger.info(f"Fetching categories from {site_name}...")
try:
base_url, auth = self.get_site_auth(site_name)
categories = {}
page = 1
while True:
response = requests.get(
f"{base_url}/wp-json/wp/v2/categories",
params={'per_page': 100, 'page': page},
auth=auth,
timeout=10
)
response.raise_for_status()
page_categories = response.json()
if not page_categories:
break
for cat in page_categories:
categories[cat['slug'].lower()] = {
'id': cat['id'],
'name': cat['name'],
'slug': cat['slug'],
'count': cat.get('count', 0)
}
# Check for more pages
if len(page_categories) < 100:
break
page += 1
self.category_cache[site_name] = categories
logger.info(f"✓ Fetched {len(categories)} categories from {site_name}")
return categories
except Exception as e:
logger.error(f"Error fetching categories from {site_name}: {e}")
return {}
def create_category(self, site_name: str, category_name: str,
description: str = '', parent_id: int = 0) -> Optional[int]:
"""
Create a new category in WordPress.
Args:
site_name: Site to create category on
category_name: Name of the category
description: Category description
parent_id: Parent category ID (0 for top-level)
Returns:
Category ID if successful, None otherwise
"""
try:
base_url, auth = self.get_site_auth(site_name)
# Create slug from name
slug = category_name.lower().replace(' ', '-').replace('/', '-')
logger.info(f"Creating category '{category_name}' on {site_name}...")
response = requests.post(
f"{base_url}/wp-json/wp/v2/categories",
json={
'name': category_name,
'slug': slug,
'description': description,
'parent': parent_id
},
auth=auth,
timeout=10
)
if response.status_code == 201:
category_data = response.json()
logger.info(f"✓ Created category '{category_name}' (ID: {category_data['id']})")
# Update cache
if site_name in self.category_cache:
self.category_cache[site_name][slug] = {
'id': category_data['id'],
'name': category_data['name'],
'slug': slug,
'count': 0
}
return category_data['id']
elif response.status_code == 409:
# Category already exists
logger.info(f" Category '{category_name}' already exists")
existing = response.json()
if isinstance(existing, list) and len(existing) > 0:
return existing[0]['id']
return None
else:
logger.error(f"Error creating category: {response.status_code} - {response.text}")
return None
except Exception as e:
logger.error(f"Error creating category: {e}")
return None
def get_or_create_category(self, site_name: str, category_name: str,
description: str = '') -> Optional[int]:
"""
Get existing category or create it if it doesn't exist.
Args:
site_name: Site to work with
category_name: Name of the category
description: Category description (used if creating)
Returns:
Category ID
"""
# Fetch categories if not cached
if site_name not in self.category_cache:
self.fetch_categories(site_name)
# Check if category exists
slug = category_name.lower().replace(' ', '-').replace('/', '-')
categories = self.category_cache.get(site_name, {})
if slug in categories:
logger.info(f"✓ Found existing category '{category_name}' (ID: {categories[slug]['id']})")
return categories[slug]['id']
# Try alternative slug formats
alt_slug = category_name.lower().replace(' ', '-')
if alt_slug in categories:
logger.info(f"✓ Found existing category '{category_name}' (ID: {categories[alt_slug]['id']})")
return categories[alt_slug]['id']
# Create new category
return self.create_category(site_name, category_name, description)
def assign_post_to_category(self, site_name: str, post_id: int,
category_id: int, append: bool = True) -> bool:
"""
Assign a post to a category.
Args:
site_name: Site where post exists
post_id: Post ID
category_id: Category ID to assign
append: If True, add to existing categories; if False, replace all
Returns:
True if successful, False otherwise
"""
try:
base_url, auth = self.get_site_auth(site_name)
if append:
# Get current categories
response = requests.get(
f"{base_url}/wp-json/wp/v2/posts/{post_id}",
auth=auth,
timeout=10
)
if response.status_code == 200:
post_data = response.json()
current_categories = post_data.get('categories', [])
if category_id not in current_categories:
current_categories.append(category_id)
else:
logger.error(f"Could not fetch post {post_id}")
return False
else:
current_categories = [category_id]
# Update post with new categories
response = requests.post(
f"{base_url}/wp-json/wp/v2/posts/{post_id}",
json={'categories': current_categories},
auth=auth,
timeout=10
)
if response.status_code == 200:
logger.info(f"✓ Assigned post {post_id} to category {category_id}")
return True
else:
logger.error(f"Error assigning category: {response.status_code}")
return False
except Exception as e:
logger.error(f"Error assigning category: {e}")
return False
def bulk_assign_categories(self, site_name: str,
post_category_map: Dict[int, List[int]]) -> Dict[str, int]:
"""
Bulk assign posts to categories.
Args:
site_name: Site to work with
post_category_map: Dict mapping post_id to list of category_ids
Returns:
Statistics dict with success/failure counts
"""
stats = {'success': 0, 'failed': 0}
logger.info(f"Bulk assigning categories on {site_name}...")
for post_id, category_ids in post_category_map.items():
for category_id in category_ids:
if self.assign_post_to_category(site_name, post_id, category_id):
stats['success'] += 1
else:
stats['failed'] += 1
logger.info(f"✓ Bulk assignment complete: {stats['success']} successful, {stats['failed']} failed")
return stats
class CategoryAssignmentProcessor:
"""Process AI category proposals and apply them to WordPress."""
def __init__(self):
"""Initialize processor."""
self.category_manager = WordPressCategoryManager()
self.processing_stats = {
'total_posts': 0,
'categories_created': 0,
'posts_updated': 0,
'errors': 0
}
def load_proposals(self, proposals_csv: str) -> List[Dict]:
"""Load category proposals from CSV."""
logger.info(f"Loading proposals from: {proposals_csv}")
try:
with open(proposals_csv, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
proposals = list(reader)
logger.info(f"✓ Loaded {len(proposals)} proposals")
return proposals
except Exception as e:
logger.error(f"Error loading proposals: {e}")
return []
def process_proposals(self, proposals: List[Dict], site_name: str,
confidence_threshold: str = 'Medium',
dry_run: bool = False) -> Dict[str, int]:
"""
Process AI category proposals and apply to WordPress.
Args:
proposals: List of proposal dicts from CSV
site_name: Site to apply changes to
confidence_threshold: Minimum confidence to apply (High, Medium, Low)
dry_run: If True, don't actually make changes
Returns:
Statistics dict
"""
logger.info("\n" + "="*70)
logger.info("PROCESSING CATEGORY PROPOSALS")
logger.info("="*70)
if dry_run:
logger.info("DRY RUN - No changes will be made")
# Filter by confidence
confidence_order = {'High': 3, 'Medium': 2, 'Low': 1}
min_confidence = confidence_order.get(confidence_threshold, 2)
filtered_proposals = [
p for p in proposals
if confidence_order.get(p.get('category_confidence', 'Medium'), 2) >= min_confidence
]
logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence >= {confidence_threshold})")
# Fetch existing categories
self.category_manager.fetch_categories(site_name)
# Process each proposal
for i, proposal in enumerate(filtered_proposals, 1):
logger.info(f"\n[{i}/{len(filtered_proposals)}] Processing post {proposal.get('post_id')}...")
post_id = int(proposal.get('post_id', 0))
proposed_category = proposal.get('proposed_category', '')
current_categories = proposal.get('current_categories', '')
confidence = proposal.get('category_confidence', 'Medium')
if not post_id or not proposed_category:
logger.warning(" Skipping: Missing post_id or proposed_category")
self.processing_stats['errors'] += 1
continue
if dry_run:
logger.info(f" Would assign to: {proposed_category}")
continue
# Get or create the category
category_id = self.category_manager.get_or_create_category(
site_name,
proposed_category,
description=f"AI-proposed category (confidence: {confidence})"
)
if category_id:
self.processing_stats['categories_created'] += 1
# Assign post to category
if self.category_manager.assign_post_to_category(
site_name, post_id, category_id, append=True
):
self.processing_stats['posts_updated'] += 1
logger.info(f" ✓ Assigned to '{proposed_category}'")
else:
self.processing_stats['errors'] += 1
else:
self.processing_stats['errors'] += 1
logger.error(f" Failed to get/create category '{proposed_category}'")
self.processing_stats['total_posts'] = len(filtered_proposals)
# Print summary
logger.info("\n" + "="*70)
logger.info("PROCESSING SUMMARY")
logger.info("="*70)
logger.info(f"Total proposals processed: {self.processing_stats['total_posts']}")
logger.info(f"Categories created/found: {self.processing_stats['categories_created']}")
logger.info(f"Posts updated: {self.processing_stats['posts_updated']}")
logger.info(f"Errors: {self.processing_stats['errors']}")
return self.processing_stats
def run(self, proposals_csv: str, site_name: str,
confidence_threshold: str = 'Medium',
dry_run: bool = False) -> Dict[str, int]:
"""
Run complete category assignment process.
Args:
proposals_csv: Path to proposals CSV
site_name: Site to apply changes to
confidence_threshold: Minimum confidence to apply
dry_run: If True, preview changes without applying
Returns:
Statistics dict
"""
proposals = self.load_proposals(proposals_csv)
if not proposals:
logger.error("No proposals to process")
return self.processing_stats
return self.process_proposals(
proposals,
site_name,
confidence_threshold,
dry_run
)