Refactor into integrated Python package structure
Architecture Changes: - Created src/seo/ package with modular architecture - Main application class (SEOApp) with Rails-inspired API - Separated concerns into distinct modules: - app.py: Main application orchestrator - cli.py: Command-line interface - config.py: Configuration management - exporter.py: Post export functionality - analyzer.py: AI analysis - recategorizer.py: Recategorization - seo_checker.py: SEO quality checking - categories.py: Category management - approval.py: User approval system New Features: - Proper Python package structure (src layout) - setup.py and setup.cfg for installation - Can be installed with: pip install -e . - Entry point: seo = seo.cli:main - Cleaner imports and dependencies Benefits: - Better code organization - Easier to maintain and extend - Follows Python best practices - Proper package isolation - Can be imported as library - Testable components - Clear separation of concerns Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
7
src/seo/__init__.py
Normal file
7
src/seo/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
SEO Automation Tool - Integrated Application
|
||||
A comprehensive WordPress SEO automation suite.
|
||||
"""
|
||||
|
||||
__version__ = '1.0.0'
|
||||
__author__ = 'SEO Automation Team'
|
||||
14
src/seo/analyzer.py
Normal file
14
src/seo/analyzer.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Analyzer Module - AI-powered post analysis
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import from scripts directory
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from ai_analyze_posts_for_decisions import PostAnalyzer
|
||||
|
||||
__all__ = ['PostAnalyzer']
|
||||
255
src/seo/app.py
Normal file
255
src/seo/app.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""
|
||||
SEO Application Core - Integrated SEO automation functionality
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
|
||||
from .exporter import PostExporter
|
||||
from .analyzer import PostAnalyzer
|
||||
from .recategorizer import PostRecategorizer
|
||||
from .seo_checker import MultiSiteSEOAnalyzer
|
||||
from .categories import CategoryManager
|
||||
from .approval import UserApprovalSystem
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SEOApp:
|
||||
"""
|
||||
Main SEO Application class.
|
||||
|
||||
Provides a unified interface for all SEO automation tasks.
|
||||
Inspired by Ruby on Rails' Active Record pattern.
|
||||
|
||||
Usage:
|
||||
app = SEOApp()
|
||||
app.export()
|
||||
app.analyze()
|
||||
app.seo_check()
|
||||
"""
|
||||
|
||||
def __init__(self, verbose: bool = False):
|
||||
"""
|
||||
Initialize the SEO application.
|
||||
|
||||
Args:
|
||||
verbose: Enable verbose logging
|
||||
"""
|
||||
self.verbose = verbose
|
||||
self.output_dir = Path(__file__).parent.parent.parent / 'output'
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize components
|
||||
self.exporter = None
|
||||
self.analyzer = None
|
||||
self.recategorizer = None
|
||||
self.seo_checker = None
|
||||
self.category_manager = None
|
||||
self.approval_system = None
|
||||
|
||||
if verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
def export(self) -> str:
|
||||
"""
|
||||
Export all posts from WordPress sites.
|
||||
|
||||
Returns:
|
||||
Path to exported CSV file
|
||||
"""
|
||||
logger.info("📦 Exporting all posts from WordPress sites...")
|
||||
self.exporter = PostExporter()
|
||||
self.exporter.run()
|
||||
|
||||
# Get the exported file path
|
||||
date_str = datetime.now().strftime('%Y-%m-%d')
|
||||
csv_file = self.output_dir / f'all_posts_{date_str}.csv'
|
||||
|
||||
logger.info(f"✅ Export completed: {csv_file}")
|
||||
return str(csv_file)
|
||||
|
||||
def analyze(self, csv_file: Optional[str] = None) -> str:
|
||||
"""
|
||||
Analyze posts with AI for recommendations.
|
||||
|
||||
Args:
|
||||
csv_file: Path to CSV file (uses latest export if not provided)
|
||||
|
||||
Returns:
|
||||
Path to analysis results
|
||||
"""
|
||||
logger.info("🤖 Analyzing posts with AI for recommendations...")
|
||||
|
||||
# Find CSV file
|
||||
if not csv_file:
|
||||
csv_file = self._find_latest_export()
|
||||
|
||||
if not csv_file:
|
||||
raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.")
|
||||
|
||||
logger.info(f"Using file: {csv_file}")
|
||||
|
||||
# Run analysis
|
||||
self.analyzer = PostAnalyzer(csv_file)
|
||||
self.analyzer.run()
|
||||
|
||||
logger.info("✅ AI analysis completed!")
|
||||
return csv_file
|
||||
|
||||
def recategorize(self, csv_file: Optional[str] = None) -> str:
|
||||
"""
|
||||
Recategorize posts with AI suggestions.
|
||||
|
||||
Args:
|
||||
csv_file: Path to CSV file (uses latest export if not provided)
|
||||
|
||||
Returns:
|
||||
Path to recategorization results
|
||||
"""
|
||||
logger.info("🏷️ Recategorizing posts with AI suggestions...")
|
||||
|
||||
# Find CSV file
|
||||
if not csv_file:
|
||||
csv_file = self._find_latest_export()
|
||||
|
||||
if not csv_file:
|
||||
raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.")
|
||||
|
||||
logger.info(f"Using file: {csv_file}")
|
||||
|
||||
# Run recategorization
|
||||
self.recategorizer = PostRecategorizer(csv_file)
|
||||
self.recategorizer.run()
|
||||
|
||||
logger.info("✅ Recategorization completed!")
|
||||
return csv_file
|
||||
|
||||
def seo_check(self, top_n: int = 10) -> None:
|
||||
"""
|
||||
Check SEO quality of titles and descriptions.
|
||||
|
||||
Args:
|
||||
top_n: Number of top posts to get AI recommendations for
|
||||
"""
|
||||
logger.info("🔍 Checking SEO quality of titles/descriptions...")
|
||||
|
||||
self.seo_checker = MultiSiteSEOAnalyzer()
|
||||
self.seo_checker.run(use_ai=True, top_n=top_n)
|
||||
|
||||
logger.info("✅ SEO check completed!")
|
||||
|
||||
def categories(self) -> None:
|
||||
"""Manage categories across all sites."""
|
||||
logger.info("🗂️ Managing categories across all sites...")
|
||||
|
||||
self.category_manager = CategoryManager()
|
||||
self.category_manager.run()
|
||||
|
||||
logger.info("✅ Category management completed!")
|
||||
|
||||
def approve(self, files: Optional[List[str]] = None) -> None:
|
||||
"""
|
||||
Review and approve recommendations.
|
||||
|
||||
Args:
|
||||
files: List of CSV files to review (auto-detects if not provided)
|
||||
"""
|
||||
logger.info("✅ Reviewing and approving recommendations...")
|
||||
|
||||
self.approval_system = UserApprovalSystem()
|
||||
|
||||
if not files:
|
||||
# Auto-detect recommendation files
|
||||
files = self._find_recommendation_files()
|
||||
|
||||
if not files:
|
||||
raise FileNotFoundError("No recommendation files found. Run analyze() or categories() first.")
|
||||
|
||||
logger.info(f"Found {len(files)} recommendation files to review")
|
||||
self.approval_system.run_interactive_approval(files)
|
||||
|
||||
logger.info("✅ Approval process completed!")
|
||||
|
||||
def full_pipeline(self) -> None:
|
||||
"""
|
||||
Run complete workflow: export → analyze → seo_check
|
||||
"""
|
||||
logger.info("🚀 Running full SEO automation pipeline...")
|
||||
|
||||
# Step 1: Export
|
||||
logger.info("\n📦 Step 1/3: Exporting posts...")
|
||||
self.export()
|
||||
|
||||
# Step 2: Analyze
|
||||
logger.info("\n🤖 Step 2/3: Analyzing with AI...")
|
||||
self.analyze()
|
||||
|
||||
# Step 3: SEO Check
|
||||
logger.info("\n🔍 Step 3/3: Checking SEO quality...")
|
||||
self.seo_check()
|
||||
|
||||
logger.info("\n✅ Full pipeline completed!")
|
||||
|
||||
def _find_latest_export(self) -> Optional[str]:
|
||||
"""
|
||||
Find the latest exported CSV file.
|
||||
|
||||
Returns:
|
||||
Path to latest CSV file or None if not found
|
||||
"""
|
||||
csv_files = list(self.output_dir.glob('all_posts_*.csv'))
|
||||
|
||||
if not csv_files:
|
||||
return None
|
||||
|
||||
latest = max(csv_files, key=lambda f: f.stat().st_ctime)
|
||||
return str(latest)
|
||||
|
||||
def _find_recommendation_files(self) -> List[str]:
|
||||
"""
|
||||
Find recommendation files in output directory.
|
||||
|
||||
Returns:
|
||||
List of paths to recommendation files
|
||||
"""
|
||||
patterns = [
|
||||
'category_assignments_*.csv',
|
||||
'posts_with_ai_recommendations_*.csv',
|
||||
'posts_to_move_*.csv',
|
||||
'posts_to_consolidate_*.csv',
|
||||
'posts_to_delete_*.csv'
|
||||
]
|
||||
|
||||
files = []
|
||||
for pattern in patterns:
|
||||
files.extend(self.output_dir.glob(pattern))
|
||||
|
||||
return [str(f) for f in files]
|
||||
|
||||
def status(self) -> dict:
|
||||
"""
|
||||
Get status of output files.
|
||||
|
||||
Returns:
|
||||
Dictionary with file information
|
||||
"""
|
||||
files = list(self.output_dir.glob('*.csv'))
|
||||
|
||||
status_info = {
|
||||
'total_files': len(files),
|
||||
'files': []
|
||||
}
|
||||
|
||||
for file in sorted(files, key=lambda f: f.stat().st_ctime, reverse=True)[:10]:
|
||||
status_info['files'].append({
|
||||
'name': file.name,
|
||||
'size_kb': file.stat().st_size / 1024,
|
||||
'modified': datetime.fromtimestamp(file.stat().st_mtime).strftime('%Y-%m-%d %H:%M')
|
||||
})
|
||||
|
||||
return status_info
|
||||
14
src/seo/approval.py
Normal file
14
src/seo/approval.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Approval System Module - User approval for recommendations
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import from scripts directory
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from user_approval import UserApprovalSystem
|
||||
|
||||
__all__ = ['UserApprovalSystem']
|
||||
14
src/seo/categories.py
Normal file
14
src/seo/categories.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Category Manager Module - Category management across sites
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import from scripts directory
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from category_manager import CategoryManager
|
||||
|
||||
__all__ = ['CategoryManager']
|
||||
223
src/seo/cli.py
Normal file
223
src/seo/cli.py
Normal file
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SEO Automation CLI - Main entry point
|
||||
Unified command-line interface for SEO automation.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
# Add src to path
|
||||
src_dir = Path(__file__).parent / 'src'
|
||||
sys.path.insert(0, str(src_dir))
|
||||
|
||||
from seo.app import SEOApp
|
||||
from seo.config import Config
|
||||
|
||||
|
||||
def main():
|
||||
"""Main CLI entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='seo',
|
||||
description='SEO Automation CLI - Manage WordPress SEO with AI',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
seo export Export all posts from WordPress sites
|
||||
seo analyze Analyze posts with AI for recommendations
|
||||
seo analyze posts.csv Analyze specific CSV file
|
||||
seo recategorize Recategorize posts with AI
|
||||
seo seo_check Check SEO quality of titles/descriptions
|
||||
seo categories Manage categories across sites
|
||||
seo approve Review and approve recommendations
|
||||
seo full_pipeline Run complete workflow: export → analyze → seo_check
|
||||
seo status Show output files status
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('command', nargs='?', help='Command to run')
|
||||
parser.add_argument('args', nargs='*', help='Arguments for the command')
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Show what would be done')
|
||||
parser.add_argument('--top-n', type=int, default=10, help='Number of top posts for AI analysis')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return 0
|
||||
|
||||
# Validate configuration
|
||||
try:
|
||||
Config.validate()
|
||||
except ValueError as e:
|
||||
print(f"❌ Configuration error: {e}")
|
||||
return 1
|
||||
|
||||
# Create application instance
|
||||
app = SEOApp(verbose=args.verbose)
|
||||
|
||||
# Route to appropriate command
|
||||
commands = {
|
||||
'export': cmd_export,
|
||||
'analyze': cmd_analyze,
|
||||
'recategorize': cmd_recategorize,
|
||||
'seo_check': cmd_seo_check,
|
||||
'categories': cmd_categories,
|
||||
'approve': cmd_approve,
|
||||
'full_pipeline': cmd_full_pipeline,
|
||||
'status': cmd_status,
|
||||
'help': cmd_help,
|
||||
}
|
||||
|
||||
if args.command not in commands:
|
||||
print(f"❌ Unknown command: {args.command}")
|
||||
print("\nAvailable commands:")
|
||||
for cmd in sorted(commands.keys()):
|
||||
print(f" {cmd}")
|
||||
return 1
|
||||
|
||||
try:
|
||||
return commands[args.command](app, args)
|
||||
except KeyboardInterrupt:
|
||||
print("\n⚠️ Operation cancelled by user")
|
||||
return 1
|
||||
except FileNotFoundError as e:
|
||||
print(f"❌ File not found: {e}")
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
if args.verbose:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
|
||||
def cmd_export(app, args):
|
||||
"""Export all posts."""
|
||||
if args.dry_run:
|
||||
print("Would export all posts from WordPress sites")
|
||||
return 0
|
||||
app.export()
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_analyze(app, args):
|
||||
"""Analyze posts with AI."""
|
||||
if args.dry_run:
|
||||
print("Would analyze posts with AI for recommendations")
|
||||
return 0
|
||||
|
||||
csv_file = args.args[0] if args.args else None
|
||||
app.analyze(csv_file)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_recategorize(app, args):
|
||||
"""Recategorize posts with AI."""
|
||||
if args.dry_run:
|
||||
print("Would recategorize posts with AI suggestions")
|
||||
return 0
|
||||
|
||||
csv_file = args.args[0] if args.args else None
|
||||
app.recategorize(csv_file)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_seo_check(app, args):
|
||||
"""Check SEO quality."""
|
||||
if args.dry_run:
|
||||
print("Would check SEO quality of titles/descriptions")
|
||||
return 0
|
||||
|
||||
app.seo_check(top_n=args.top_n)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_categories(app, args):
|
||||
"""Manage categories."""
|
||||
if args.dry_run:
|
||||
print("Would manage categories across all sites")
|
||||
return 0
|
||||
|
||||
app.categories()
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_approve(app, args):
|
||||
"""Approve recommendations."""
|
||||
if args.dry_run:
|
||||
print("Would review and approve recommendations")
|
||||
return 0
|
||||
|
||||
files = args.args if args.args else None
|
||||
app.approve(files)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_full_pipeline(app, args):
|
||||
"""Run full pipeline."""
|
||||
if args.dry_run:
|
||||
print("Would run full pipeline: export → analyze → seo_check")
|
||||
return 0
|
||||
|
||||
app.full_pipeline()
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_status(app, args):
|
||||
"""Show status."""
|
||||
if args.dry_run:
|
||||
print("Would show output files status")
|
||||
return 0
|
||||
|
||||
status = app.status()
|
||||
|
||||
print("📊 Output files status:")
|
||||
if status['total_files'] > 0:
|
||||
print(f"\nFound {status['total_files']} CSV files in output/:")
|
||||
for file in status['files']:
|
||||
print(f" {file['name']} ({file['size_kb']:.1f}KB, {file['modified']})")
|
||||
else:
|
||||
print(" No CSV files found in output/")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_help(app, args):
|
||||
"""Show help."""
|
||||
print("""
|
||||
SEO Automation CLI - Available Commands
|
||||
|
||||
Basic Commands:
|
||||
export Export all posts from WordPress sites
|
||||
analyze [csv_file] Analyze posts with AI (optional CSV input)
|
||||
recategorize [csv_file] Recategorize posts with AI (optional CSV input)
|
||||
seo_check Check SEO quality of titles/descriptions
|
||||
categories Manage categories across all sites
|
||||
approve [files...] Review and approve recommendations
|
||||
full_pipeline Run complete workflow: export → analyze → seo_check
|
||||
|
||||
Utility:
|
||||
status Show output files status
|
||||
help Show this help message
|
||||
|
||||
Options:
|
||||
--verbose, -v Enable verbose logging
|
||||
--dry-run Show what would be done without doing it
|
||||
--top-n N Number of top posts for AI analysis (default: 10)
|
||||
|
||||
Examples:
|
||||
seo export
|
||||
seo analyze
|
||||
seo analyze output/all_posts_2026-02-16.csv
|
||||
seo approve output/category_assignments_*.csv
|
||||
seo full_pipeline
|
||||
seo status
|
||||
""")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
70
src/seo/config.py
Normal file
70
src/seo/config.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""
|
||||
Configuration module for SEO application.
|
||||
Loads configuration from environment variables and YAML.
|
||||
"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class Config:
|
||||
"""Configuration class for SEO automation."""
|
||||
|
||||
CONFIG_FILE = Path(__file__).parent.parent / 'config.yaml'
|
||||
|
||||
if CONFIG_FILE.exists():
|
||||
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
|
||||
YAML_CONFIG = yaml.safe_load(f)
|
||||
else:
|
||||
YAML_CONFIG = {}
|
||||
|
||||
# WordPress Settings
|
||||
WORDPRESS_URL = os.getenv('WORDPRESS_URL', YAML_CONFIG.get('primary_site', {}).get('url', '')).rstrip('/')
|
||||
WORDPRESS_USERNAME = os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('primary_site', {}).get('username', ''))
|
||||
WORDPRESS_APP_PASSWORD = os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('primary_site', {}).get('password', ''))
|
||||
|
||||
# Multi-site Configuration
|
||||
WORDPRESS_SITES = {
|
||||
'mistergeek.net': {
|
||||
'url': os.getenv('WORDPRESS_MISTERGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('url', 'https://www.mistergeek.net')),
|
||||
'username': os.getenv('WORDPRESS_MISTERGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
|
||||
'password': os.getenv('WORDPRESS_MISTERGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
|
||||
},
|
||||
'webscroll.fr': {
|
||||
'url': os.getenv('WORDPRESS_WEBSCROLL_URL', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('url', 'https://www.webscroll.fr')),
|
||||
'username': os.getenv('WORDPRESS_WEBSCROLL_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
|
||||
'password': os.getenv('WORDPRESS_WEBSCROLL_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
|
||||
},
|
||||
'hellogeek.net': {
|
||||
'url': os.getenv('WORDPRESS_HELLOGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('url', 'https://www.hellogeek.net')),
|
||||
'username': os.getenv('WORDPRESS_HELLOGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
|
||||
'password': os.getenv('WORDPRESS_HELLOGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
|
||||
}
|
||||
}
|
||||
|
||||
# OpenRouter API Settings
|
||||
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', YAML_CONFIG.get('ai_model', {}).get('api_key', ''))
|
||||
AI_MODEL = os.getenv('AI_MODEL', YAML_CONFIG.get('ai_model', {}).get('name', 'anthropic/claude-3.5-sonnet'))
|
||||
|
||||
@classmethod
|
||||
def validate(cls):
|
||||
"""Validate configuration."""
|
||||
errors = []
|
||||
|
||||
if not cls.WORDPRESS_URL:
|
||||
errors.append("WORDPRESS_URL is required")
|
||||
if not cls.WORDPRESS_USERNAME:
|
||||
errors.append("WORDPRESS_USERNAME is required")
|
||||
if not cls.WORDPRESS_APP_PASSWORD:
|
||||
errors.append("WORDPRESS_APP_PASSWORD is required")
|
||||
if not cls.OPENROUTER_API_KEY:
|
||||
errors.append("OPENROUTER_API_KEY is required")
|
||||
|
||||
if errors:
|
||||
raise ValueError("Configuration errors:\n" + "\n".join(f" - {e}" for e in errors))
|
||||
|
||||
return True
|
||||
226
src/seo/exporter.py
Normal file
226
src/seo/exporter.py
Normal file
@@ -0,0 +1,226 @@
|
||||
"""
|
||||
Post Exporter Module - Export posts from WordPress sites
|
||||
"""
|
||||
|
||||
import csv
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
import re
|
||||
|
||||
from .config import Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PostExporter:
|
||||
"""Export posts from WordPress sites to CSV."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the exporter."""
|
||||
self.sites = Config.WORDPRESS_SITES
|
||||
self.all_posts = []
|
||||
self.category_cache = {}
|
||||
|
||||
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, str]:
|
||||
"""Fetch category names from a WordPress site."""
|
||||
if site_name in self.category_cache:
|
||||
return self.category_cache[site_name]
|
||||
|
||||
logger.info(f" Fetching categories from {site_name}...")
|
||||
categories = {}
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/categories"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
try:
|
||||
response = requests.get(api_url, params={'per_page': 100}, auth=auth, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
for cat in response.json():
|
||||
categories[cat['id']] = {'name': cat.get('name', ''), 'slug': cat.get('slug', '')}
|
||||
logger.info(f" ✓ Fetched {len(categories)} categories")
|
||||
except Exception as e:
|
||||
logger.warning(f" Could not fetch categories from {site_name}: {e}")
|
||||
|
||||
self.category_cache[site_name] = categories
|
||||
return categories
|
||||
|
||||
def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
|
||||
"""Fetch all posts from a WordPress site."""
|
||||
logger.info(f"\nFetching posts from {site_name}...")
|
||||
|
||||
posts = []
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
for status in ['publish', 'draft']:
|
||||
page = 1
|
||||
status_count = 0
|
||||
|
||||
while True:
|
||||
try:
|
||||
logger.info(f" Fetching page {page} ({status} posts)...")
|
||||
response = requests.get(
|
||||
api_url,
|
||||
params={'page': page, 'per_page': 100, 'status': status},
|
||||
auth=auth,
|
||||
timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
page_posts = response.json()
|
||||
if not page_posts:
|
||||
break
|
||||
|
||||
posts.extend(page_posts)
|
||||
status_count += len(page_posts)
|
||||
logger.info(f" ✓ Got {len(page_posts)} posts (total: {len(posts)})")
|
||||
|
||||
page += 1
|
||||
time.sleep(0.5)
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if response.status_code == 400:
|
||||
logger.info(f" ℹ API limit reached (got {status_count} {status} posts)")
|
||||
break
|
||||
else:
|
||||
logger.error(f"Error on page {page}: {e}")
|
||||
break
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Error fetching from {site_name}: {e}")
|
||||
break
|
||||
|
||||
logger.info(f"✓ Total posts from {site_name}: {len(posts)}\n")
|
||||
return posts
|
||||
|
||||
def extract_post_details(self, post: Dict, site_name: str, category_map: Dict) -> Dict:
|
||||
"""Extract post details for CSV export."""
|
||||
title = post.get('title', {})
|
||||
if isinstance(title, dict):
|
||||
title = title.get('rendered', '')
|
||||
|
||||
content = post.get('content', {})
|
||||
if isinstance(content, dict):
|
||||
content = content.get('rendered', '')
|
||||
content_text = re.sub('<[^<]+?>', '', content)[:500]
|
||||
|
||||
excerpt = post.get('excerpt', {})
|
||||
if isinstance(excerpt, dict):
|
||||
excerpt = excerpt.get('rendered', '')
|
||||
excerpt_text = re.sub('<[^<]+?>', '', excerpt)
|
||||
|
||||
meta_dict = post.get('meta', {}) if isinstance(post.get('meta'), dict) else {}
|
||||
meta_description = (
|
||||
meta_dict.get('rank_math_description', '') or
|
||||
meta_dict.get('_yoast_wpseo_metadesc', '') or ''
|
||||
)
|
||||
|
||||
category_ids = post.get('categories', [])
|
||||
category_names = ', '.join([
|
||||
category_map.get(cat_id, {}).get('name', str(cat_id))
|
||||
for cat_id in category_ids
|
||||
]) if category_ids else ''
|
||||
|
||||
return {
|
||||
'site': site_name,
|
||||
'post_id': post['id'],
|
||||
'status': post.get('status', 'publish'),
|
||||
'title': title.strip(),
|
||||
'slug': post.get('slug', ''),
|
||||
'url': post.get('link', ''),
|
||||
'author_id': post.get('author', ''),
|
||||
'date_published': post.get('date', ''),
|
||||
'date_modified': post.get('modified', ''),
|
||||
'categories': category_names,
|
||||
'tags': ', '.join([str(t) for t in post.get('tags', [])]),
|
||||
'excerpt': excerpt_text.strip(),
|
||||
'content_preview': content_text.strip(),
|
||||
'seo_title': meta_dict.get('rank_math_title', ''),
|
||||
'meta_description': meta_description,
|
||||
'focus_keyword': meta_dict.get('rank_math_focus_keyword', ''),
|
||||
'word_count': len(content_text.split()),
|
||||
}
|
||||
|
||||
def export_to_csv(self, output_file: Optional[str] = None) -> str:
|
||||
"""Export all posts to CSV."""
|
||||
if not output_file:
|
||||
output_dir = Path(__file__).parent.parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
date_str = datetime.now().strftime('%Y-%m-%d')
|
||||
output_file = output_dir / f'all_posts_{date_str}.csv'
|
||||
|
||||
output_file = Path(output_file)
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not self.all_posts:
|
||||
logger.error("No posts to export")
|
||||
return None
|
||||
|
||||
fieldnames = [
|
||||
'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id',
|
||||
'date_published', 'date_modified', 'categories', 'tags', 'excerpt',
|
||||
'content_preview', 'seo_title', 'meta_description', 'focus_keyword', 'word_count',
|
||||
]
|
||||
|
||||
logger.info(f"Exporting {len(self.all_posts)} posts to CSV...")
|
||||
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(self.all_posts)
|
||||
|
||||
logger.info(f"✓ CSV exported to: {output_file}")
|
||||
return str(output_file)
|
||||
|
||||
def run(self):
|
||||
"""Run the complete export process."""
|
||||
logger.info("="*70)
|
||||
logger.info("EXPORTING ALL POSTS FOR AI DECISION MAKING")
|
||||
logger.info("="*70)
|
||||
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
|
||||
|
||||
for site_name, config in self.sites.items():
|
||||
categories = self.fetch_category_names(site_name, config)
|
||||
posts = self.fetch_posts_from_site(site_name, config)
|
||||
|
||||
if posts:
|
||||
for post in posts:
|
||||
post_details = self.extract_post_details(post, site_name, categories)
|
||||
self.all_posts.append(post_details)
|
||||
|
||||
if not self.all_posts:
|
||||
logger.error("No posts found on any site")
|
||||
return
|
||||
|
||||
self.all_posts.sort(key=lambda x: (x['site'], x['post_id']))
|
||||
self.export_to_csv()
|
||||
|
||||
# Print summary
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("EXPORT SUMMARY")
|
||||
logger.info("="*70)
|
||||
|
||||
by_site = {}
|
||||
for post in self.all_posts:
|
||||
site = post['site']
|
||||
if site not in by_site:
|
||||
by_site[site] = {'total': 0, 'published': 0, 'draft': 0}
|
||||
by_site[site]['total'] += 1
|
||||
if post['status'] == 'publish':
|
||||
by_site[site]['published'] += 1
|
||||
else:
|
||||
by_site[site]['draft'] += 1
|
||||
|
||||
for site, stats in sorted(by_site.items()):
|
||||
logger.info(f"\n{site}:")
|
||||
logger.info(f" Total: {stats['total']}")
|
||||
logger.info(f" Published: {stats['published']}")
|
||||
logger.info(f" Drafts: {stats['draft']}")
|
||||
|
||||
logger.info(f"\n✓ Export complete!")
|
||||
14
src/seo/recategorizer.py
Normal file
14
src/seo/recategorizer.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Recategorizer Module - AI-powered post recategorization
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import from scripts directory
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from ai_recategorize_posts import PostRecategorizer
|
||||
|
||||
__all__ = ['PostRecategorizer']
|
||||
14
src/seo/seo_checker.py
Normal file
14
src/seo/seo_checker.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
SEO Checker Module - SEO quality analysis
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import from scripts directory
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from multi_site_seo_analyzer import MultiSiteSEOAnalyzer
|
||||
|
||||
__all__ = ['MultiSiteSEOAnalyzer']
|
||||
Reference in New Issue
Block a user