Refactor into integrated Python package structure

Architecture Changes:
- Created src/seo/ package with modular architecture
- Main application class (SEOApp) with Rails-inspired API
- Separated concerns into distinct modules:
  - app.py: Main application orchestrator
  - cli.py: Command-line interface
  - config.py: Configuration management
  - exporter.py: Post export functionality
  - analyzer.py: AI analysis
  - recategorizer.py: Recategorization
  - seo_checker.py: SEO quality checking
  - categories.py: Category management
  - approval.py: User approval system

New Features:
- Proper Python package structure (src layout)
- setup.py and setup.cfg for installation
- Can be installed with: pip install -e .
- Entry point: seo = seo.cli:main
- Cleaner imports and dependencies

Benefits:
- Better code organization
- Easier to maintain and extend
- Follows Python best practices
- Proper package isolation
- Can be imported as library
- Testable components
- Clear separation of concerns

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
Kevin Bataille
2026-02-16 14:37:11 +01:00
parent 8c7cd24685
commit d1b8e2c292
14 changed files with 977 additions and 493 deletions

View File

@@ -193,23 +193,39 @@ The SEO automation tool provides a simple, intuitive CLI inspired by Ruby on Rai
```
seo/
├── seo # Main CLI application (executable)
├── scripts/
│ ├── config.py # Configuration loader
│ ├── export_posts_for_ai_decision.py
│ ├── ai_analyze_posts_for_decisions.py
│ ├── multi_site_seo_analyzer.py
│ ├── category_manager.py # Category management
│ ├── user_approval.py # Approval system
── ...
├── seo # Main CLI executable
├── src/seo/ # Integrated application package
│ ├── __init__.py # Package initialization
│ ├── cli.py # Command-line interface
│ ├── app.py # Main application class
│ ├── config.py # Configuration management
│ ├── exporter.py # Post export functionality
│ ├── analyzer.py # AI analysis functionality
── recategorizer.py # Recategorization functionality
│ ├── seo_checker.py # SEO quality checking
│ ├── categories.py # Category management
│ └── approval.py # User approval system
├── scripts/ # Legacy scripts (deprecated)
├── config.yaml # YAML configuration
├── .env # Environment variables
├── .env.example # Template
├── requirements.txt # Dependencies
├── output/reports/ # Generated CSV files
├── output/ # Generated files
└── README.md
```
## 📦 Installation
For development/installation:
```bash
# Install in development mode
pip install -e .
# Or just use the executable directly
./seo help
```
## 🎯 Typical Workflow
1. **Export posts** from all sites:

489
seo
View File

@@ -1,493 +1,18 @@
#!/usr/bin/env python3
"""
SEO Automation CLI - Inspired by Ruby on Rails CLI
Simple, intuitive commands for managing WordPress SEO
SEO Automation CLI - Main executable
Entry point for the SEO automation tool.
"""
import sys
import os
import argparse
from pathlib import Path
# Add scripts directory to path
SCRIPTS_DIR = Path(__file__).parent / 'scripts'
sys.path.insert(0, str(SCRIPTS_DIR))
from config import Config
from export_posts_for_ai_decision import PostExporter
from ai_analyze_posts_for_decisions import PostAnalyzer
from ai_recategorize_posts import PostRecategorizer
from multi_site_seo_analyzer import MultiSiteSEOAnalyzer
from analytics_importer import AnalyticsImporter
from content_gap_analyzer import ContentGapAnalyzer
from opportunity_analyzer import OpportunityAnalyzer
from report_generator import ReportGenerator
from category_manager import CategoryManager
from user_approval import UserApprovalSystem
def main():
"""Main CLI entry point"""
parser = argparse.ArgumentParser(
prog='seo',
description='SEO Automation CLI - Manage WordPress SEO with AI',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
seo help # Show this help
seo export # Export all posts from WordPress sites
seo analyze # Analyze posts with AI for recommendations
seo recategorize # Recategorize posts with AI
seo seo_check # Check SEO quality of titles/descriptions
seo categories # Manage categories across sites
seo approve # Review and approve recommendations
seo full_pipeline # Run complete workflow: export → analyze → seo_check
seo analytics ga4.csv gsc.csv # Import analytics data
"""
)
parser.add_argument('command', nargs='?', help='Command to run')
parser.add_argument('args', nargs='*', help='Arguments for the command')
# Global options
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
parser.add_argument('--dry-run', action='store_true', help='Show what would be done without doing it')
args = parser.parse_args()
if not args.command:
parser.print_help()
return 0
# Validate configuration
try:
Config.validate()
except ValueError as e:
print(f"❌ Configuration error: {e}")
return 1
# Route to appropriate command
command_map = {
'help': show_help,
'export': export_posts,
'analyze': analyze_posts,
'recategorize': recategorize_posts,
'seo_check': seo_check,
'categories': manage_categories,
'approve': approve_recommendations,
'full_pipeline': run_full_pipeline,
'analytics': import_analytics,
'gaps': analyze_content_gaps,
'opportunities': analyze_opportunities,
'report': generate_report,
'status': show_status,
}
if args.command not in command_map:
print(f"❌ Unknown command: {args.command}")
print("\nAvailable commands:")
for cmd in sorted(command_map.keys()):
print(f" {cmd}")
return 1
# Execute the command
try:
return command_map[args.command](args.args, verbose=args.verbose, dry_run=args.dry_run)
except KeyboardInterrupt:
print("\n⚠ Operation cancelled by user")
return 1
except Exception as e:
print(f"❌ Error running command '{args.command}': {e}")
if args.verbose:
import traceback
traceback.print_exc()
return 1
def show_help(args, verbose=False, dry_run=False):
"""Show help message"""
print("""
SEO Automation CLI - Available Commands
Basic Commands:
export Export all posts from WordPress sites
analyze Analyze posts with AI for recommendations
recategorize Recategorize posts with AI suggestions
seo_check Check SEO quality of titles/descriptions
categories Manage categories across all sites
approve Review and approve recommendations
full_pipeline Run complete workflow: export → analyze → seo_check
Advanced Commands:
analytics <ga_file> <gsc_file> Import analytics data
gaps Analyze content gaps
opportunities Analyze keyword opportunities
report Generate SEO optimization report
status Show output files status
Utility:
help Show this help message
Examples:
seo export
seo analyze
seo full_pipeline
seo analytics ga4.csv gsc.csv
""")
return 0
def export_posts(args, verbose=False, dry_run=False):
"""Export all posts from WordPress sites"""
if dry_run:
print("Would export all posts from WordPress sites")
return 0
print("📦 Exporting all posts from WordPress sites...")
exporter = PostExporter()
exporter.run()
print("✅ Export completed!")
return 0
def analyze_posts(args, verbose=False, dry_run=False):
"""Analyze posts with AI for recommendations"""
if dry_run:
print("Would analyze posts with AI for recommendations")
return 0
print("🤖 Analyzing posts with AI for recommendations...")
# Find the CSV file to analyze
csv_file = None
if args:
csv_file = args[0]
else:
# Find the latest exported CSV
output_dir = Path(__file__).parent / 'output'
csv_files = list(output_dir.glob('all_posts_*.csv'))
if not csv_files:
print("❌ No exported posts found. Run 'seo export' first or provide a CSV file.")
print(" Usage: seo analyze <csv_file>")
return 1
csv_file = str(max(csv_files, key=os.path.getctime))
print(f"Using file: {csv_file}")
analyzer = PostAnalyzer(csv_file)
analyzer.run()
print("✅ AI analysis completed!")
return 0
def recategorize_posts(args, verbose=False, dry_run=False):
"""Recategorize posts with AI suggestions"""
if dry_run:
print("Would recategorize posts with AI suggestions")
return 0
print("🏷️ Recategorizing posts with AI suggestions...")
# Find the CSV file to recategorize
csv_file = None
if args:
csv_file = args[0]
else:
# Find the latest exported CSV
output_dir = Path(__file__).parent / 'output'
csv_files = list(output_dir.glob('all_posts_*.csv'))
if not csv_files:
print("❌ No exported posts found. Run 'seo export' first or provide a CSV file.")
print(" Usage: seo recategorize <csv_file>")
return 1
csv_file = str(max(csv_files, key=os.path.getctime))
print(f"Using file: {csv_file}")
recategorizer = PostRecategorizer(csv_file)
recategorizer.run()
print("✅ Recategorization completed!")
return 0
def seo_check(args, verbose=False, dry_run=False):
"""Check SEO quality of titles/descriptions"""
if dry_run:
print("Would check SEO quality of titles/descriptions")
return 0
print("🔍 Checking SEO quality of titles/descriptions...")
# Parse optional arguments
top_n = 10 # Default
for arg in args:
if arg.startswith('--top-n=') or '=' in arg:
try:
top_n = int(arg.split('=')[1])
except ValueError:
print(f"❌ Invalid top-n value: {arg}")
return 1
analyzer = MultiSiteSEOAnalyzer()
analyzer.run(use_ai=True, top_n=top_n)
print("✅ SEO check completed!")
return 0
def manage_categories(args, verbose=False, dry_run=False):
"""Manage categories across all sites"""
if dry_run:
print("Would manage categories across all sites")
return 0
print("🗂️ Managing categories across all sites...")
manager = CategoryManager()
manager.run()
print("✅ Category management completed!")
return 0
def approve_recommendations(args, verbose=False, dry_run=False):
"""Review and approve recommendations"""
if dry_run:
print("Would review and approve recommendations")
return 0
print("✅ Reviewing and approving recommendations...")
# Use provided CSV files or find recommendation files
csv_files = []
if args:
# Use provided files
csv_files = [Path(f) for f in args if Path(f).exists()]
if not csv_files:
print("❌ None of the provided files exist.")
return 1
else:
# Find recommendation files in output directory
output_dir = Path(__file__).parent / 'output'
# Look for common recommendation files
patterns = [
'category_assignments_*.csv',
'posts_with_ai_recommendations_*.csv',
'posts_to_move_*.csv',
'posts_to_consolidate_*.csv',
'posts_to_delete_*.csv'
]
for pattern in patterns:
csv_files.extend(output_dir.glob(pattern))
if not csv_files:
print("❌ No recommendation files found. Run 'seo analyze' or 'seo categories' first.")
print(" Or provide a CSV file: seo approve <file1.csv> [file2.csv] ...")
return 1
print(f"Found {len(csv_files)} recommendation files to review:")
for csv_file in csv_files:
print(f" - {csv_file.name}")
approval_system = UserApprovalSystem()
approval_system.run_interactive_approval([str(f) for f in csv_files])
print("✅ Approval process completed!")
return 0
def run_full_pipeline(args, verbose=False, dry_run=False):
"""Run complete workflow: export → analyze → seo_check"""
if dry_run:
print("Would run full pipeline: export → analyze → seo_check")
return 0
print("🚀 Running full SEO automation pipeline...")
# Export
print("\n📦 Step 1/3: Exporting posts...")
exporter = PostExporter()
exporter.run()
# Analyze
print("\n🤖 Step 2/3: Analyzing with AI...")
output_dir = Path(__file__).parent / 'output'
csv_files = list(output_dir.glob('all_posts_*.csv'))
if csv_files:
latest_csv = max(csv_files, key=os.path.getctime)
analyzer = PostAnalyzer(str(latest_csv))
analyzer.run()
# SEO Check
print("\n🔍 Step 3/3: Checking SEO quality...")
seo_analyzer = MultiSiteSEOAnalyzer()
seo_analyzer.run(use_ai=True, top_n=10)
print("\n✅ Full pipeline completed!")
return 0
def import_analytics(args, verbose=False, dry_run=False):
"""Import analytics data"""
if dry_run:
print("Would import analytics data")
return 0
if len(args) < 2:
print("❌ Usage: seo analytics <ga_file> <gsc_file>")
return 1
ga_file = args[0]
gsc_file = args[1]
# Find the latest exported posts CSV
output_dir = Path(__file__).parent / 'output'
posts_files = list(output_dir.glob('all_posts_*.csv'))
if not posts_files:
print("❌ No exported posts found. Run 'seo export' first.")
return 1
latest_posts = max(posts_files, key=os.path.getctime)
print(f"📊 Importing analytics data...")
print(f"GA4 file: {ga_file}")
print(f"GSC file: {gsc_file}")
print(f"Posts file: {latest_posts.name}")
importer = AnalyticsImporter()
importer.run(
ga_csv=Path(ga_file),
gsc_csv=Path(gsc_file),
posts_csv=latest_posts,
output_csv=output_dir / 'posts_with_analytics.csv'
)
print("✅ Analytics import completed!")
return 0
def analyze_content_gaps(args, verbose=False, dry_run=False):
"""Analyze content gaps"""
if dry_run:
print("Would analyze content gaps")
return 0
print("🕳️ Analyzing content gaps...")
# Find posts with analytics
output_dir = Path(__file__).parent / 'output'
posts_file = output_dir / 'results' / 'posts_with_analytics.csv'
if not posts_file.exists():
print("❌ Posts with analytics not found. Run 'seo analytics' first.")
return 1
# Find GSC queries
gsc_file = Path(__file__).parent / 'input' / 'analytics' / 'gsc' / 'Requêtes.csv'
if not gsc_file.exists():
gsc_file = output_dir / 'gsc_queries.csv' # fallback
if not gsc_file.exists():
print("❌ GSC queries file not found. Expected at input/analytics/gsc/Requêtes.csv")
return 1
analyzer = ContentGapAnalyzer()
analyzer.run(
posts_csv=posts_file,
gsc_csv=gsc_file,
output_csv=output_dir / 'results' / 'content_gaps.csv'
)
print("✅ Content gap analysis completed!")
return 0
def analyze_opportunities(args, verbose=False, dry_run=False):
"""Analyze keyword opportunities"""
if dry_run:
print("Would analyze keyword opportunities")
return 0
print("🎯 Analyzing keyword opportunities...")
# Find posts with analytics
output_dir = Path(__file__).parent / 'output' / 'results'
posts_file = output_dir / 'posts_with_analytics.csv'
if not posts_file.exists():
print("❌ Posts with analytics not found. Run 'seo analytics' first.")
return 1
analyzer = OpportunityAnalyzer()
analyzer.run(
posts_csv=posts_file,
output_csv=output_dir / 'keyword_opportunities.csv'
)
print("✅ Opportunity analysis completed!")
return 0
def generate_report(args, verbose=False, dry_run=False):
"""Generate SEO optimization report"""
if dry_run:
print("Would generate SEO optimization report")
return 0
print("📋 Generating SEO optimization report...")
output_dir = Path(__file__).parent / 'output' / 'results'
posts_file = output_dir / 'posts_with_analytics.csv'
opportunities_file = output_dir / 'keyword_opportunities.csv'
gaps_file = output_dir / 'content_gaps.csv'
if not posts_file.exists():
print("❌ Posts with analytics not found. Run 'seo analytics' first.")
return 1
generator = ReportGenerator()
generator.run(
posts_csv=posts_file,
opportunities_csv=opportunities_file,
gaps_csv=gaps_file,
output_md=output_dir / 'seo_optimization_report.md',
output_prioritized_csv=output_dir / 'posts_prioritized.csv'
)
print("✅ Report generation completed!")
return 0
def show_status(args, verbose=False, dry_run=False):
"""Show output files status"""
if dry_run:
print("Would show output files status")
return 0
print("📊 Output files status:")
output_dir = Path(__file__).parent / 'output'
if output_dir.exists():
files = list(output_dir.glob('*.csv'))
if files:
print(f"\nFound {len(files)} CSV files in output/:")
for file in sorted(files, key=os.path.getctime, reverse=True)[:10]: # Show latest 10
size = file.stat().st_size / 1024 # KB
mtime = file.stat().st_mtime
from datetime import datetime
date = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M')
print(f" {file.name} ({size:.1f}KB, {date})")
else:
print(" No CSV files found in output/")
else:
print(" output/ directory not found")
return 0
# Add src to path
src_dir = Path(__file__).parent / 'src'
sys.path.insert(0, str(src_dir))
# Import and run CLI
from seo.cli import main
if __name__ == '__main__':
sys.exit(main())

37
setup.cfg Normal file
View File

@@ -0,0 +1,37 @@
[metadata]
name = seo-automation
version = 1.0.0
description = WordPress SEO automation with AI-powered recommendations
long_description = file: README.md
long_description_content_type = text/markdown
license = MIT
author = SEO Automation Team
url = https://github.com/example/seo-automation
classifiers =
Development Status :: 4 - Beta
Intended Audience :: Developers
Topic :: Internet :: WWW/HTTP
License :: OSI Approved :: MIT License
Programming Language :: Python :: 3
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
[options]
package_dir =
= src
packages = find:
python_requires = >=3.8
install_requires =
requests>=2.31.0
python-dotenv>=1.0.0
PyYAML>=6.0
[options.packages.find]
where = src
[options.entry_points]
console_scripts =
seo = seo.cli:main

55
setup.py Normal file
View File

@@ -0,0 +1,55 @@
#!/usr/bin/env python3
"""
Setup script for SEO Automation Tool
"""
from setuptools import setup, find_packages
from pathlib import Path
# Read README for long description
readme_path = Path(__file__).parent / 'README.md'
long_description = readme_path.read_text(encoding='utf-8') if readme_path.exists() else ''
# Read requirements
requirements_path = Path(__file__).parent / 'requirements.txt'
requirements = [
line.strip()
for line in requirements_path.read_text().splitlines()
if line.strip() and not line.startswith('#')
] if requirements_path.exists() else []
setup(
name='seo-automation',
version='1.0.0',
author='SEO Automation Team',
author_email='seo@example.com',
description='WordPress SEO automation with AI-powered recommendations',
long_description=long_description,
long_description_content_type='text/markdown',
url='https://github.com/example/seo-automation',
packages=find_packages(where='src'),
package_dir={'': 'src'},
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Topic :: Internet :: WWW/HTTP',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
],
python_requires='>=3.8',
install_requires=requirements,
entry_points={
'console_scripts': [
'seo=seo.cli:main',
],
},
include_package_data=True,
package_data={
'seo': ['py.typed'],
},
)

7
src/seo/__init__.py Normal file
View File

@@ -0,0 +1,7 @@
"""
SEO Automation Tool - Integrated Application
A comprehensive WordPress SEO automation suite.
"""
__version__ = '1.0.0'
__author__ = 'SEO Automation Team'

14
src/seo/analyzer.py Normal file
View File

@@ -0,0 +1,14 @@
"""
Analyzer Module - AI-powered post analysis
"""
import sys
from pathlib import Path
# Import from scripts directory
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
sys.path.insert(0, str(scripts_dir))
from ai_analyze_posts_for_decisions import PostAnalyzer
__all__ = ['PostAnalyzer']

255
src/seo/app.py Normal file
View File

@@ -0,0 +1,255 @@
"""
SEO Application Core - Integrated SEO automation functionality
"""
import logging
from pathlib import Path
from datetime import datetime
from typing import Optional, List
from .exporter import PostExporter
from .analyzer import PostAnalyzer
from .recategorizer import PostRecategorizer
from .seo_checker import MultiSiteSEOAnalyzer
from .categories import CategoryManager
from .approval import UserApprovalSystem
logger = logging.getLogger(__name__)
class SEOApp:
"""
Main SEO Application class.
Provides a unified interface for all SEO automation tasks.
Inspired by Ruby on Rails' Active Record pattern.
Usage:
app = SEOApp()
app.export()
app.analyze()
app.seo_check()
"""
def __init__(self, verbose: bool = False):
"""
Initialize the SEO application.
Args:
verbose: Enable verbose logging
"""
self.verbose = verbose
self.output_dir = Path(__file__).parent.parent.parent / 'output'
self.output_dir.mkdir(parents=True, exist_ok=True)
# Initialize components
self.exporter = None
self.analyzer = None
self.recategorizer = None
self.seo_checker = None
self.category_manager = None
self.approval_system = None
if verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
def export(self) -> str:
"""
Export all posts from WordPress sites.
Returns:
Path to exported CSV file
"""
logger.info("📦 Exporting all posts from WordPress sites...")
self.exporter = PostExporter()
self.exporter.run()
# Get the exported file path
date_str = datetime.now().strftime('%Y-%m-%d')
csv_file = self.output_dir / f'all_posts_{date_str}.csv'
logger.info(f"✅ Export completed: {csv_file}")
return str(csv_file)
def analyze(self, csv_file: Optional[str] = None) -> str:
"""
Analyze posts with AI for recommendations.
Args:
csv_file: Path to CSV file (uses latest export if not provided)
Returns:
Path to analysis results
"""
logger.info("🤖 Analyzing posts with AI for recommendations...")
# Find CSV file
if not csv_file:
csv_file = self._find_latest_export()
if not csv_file:
raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.")
logger.info(f"Using file: {csv_file}")
# Run analysis
self.analyzer = PostAnalyzer(csv_file)
self.analyzer.run()
logger.info("✅ AI analysis completed!")
return csv_file
def recategorize(self, csv_file: Optional[str] = None) -> str:
"""
Recategorize posts with AI suggestions.
Args:
csv_file: Path to CSV file (uses latest export if not provided)
Returns:
Path to recategorization results
"""
logger.info("🏷️ Recategorizing posts with AI suggestions...")
# Find CSV file
if not csv_file:
csv_file = self._find_latest_export()
if not csv_file:
raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.")
logger.info(f"Using file: {csv_file}")
# Run recategorization
self.recategorizer = PostRecategorizer(csv_file)
self.recategorizer.run()
logger.info("✅ Recategorization completed!")
return csv_file
def seo_check(self, top_n: int = 10) -> None:
"""
Check SEO quality of titles and descriptions.
Args:
top_n: Number of top posts to get AI recommendations for
"""
logger.info("🔍 Checking SEO quality of titles/descriptions...")
self.seo_checker = MultiSiteSEOAnalyzer()
self.seo_checker.run(use_ai=True, top_n=top_n)
logger.info("✅ SEO check completed!")
def categories(self) -> None:
"""Manage categories across all sites."""
logger.info("🗂️ Managing categories across all sites...")
self.category_manager = CategoryManager()
self.category_manager.run()
logger.info("✅ Category management completed!")
def approve(self, files: Optional[List[str]] = None) -> None:
"""
Review and approve recommendations.
Args:
files: List of CSV files to review (auto-detects if not provided)
"""
logger.info("✅ Reviewing and approving recommendations...")
self.approval_system = UserApprovalSystem()
if not files:
# Auto-detect recommendation files
files = self._find_recommendation_files()
if not files:
raise FileNotFoundError("No recommendation files found. Run analyze() or categories() first.")
logger.info(f"Found {len(files)} recommendation files to review")
self.approval_system.run_interactive_approval(files)
logger.info("✅ Approval process completed!")
def full_pipeline(self) -> None:
"""
Run complete workflow: export → analyze → seo_check
"""
logger.info("🚀 Running full SEO automation pipeline...")
# Step 1: Export
logger.info("\n📦 Step 1/3: Exporting posts...")
self.export()
# Step 2: Analyze
logger.info("\n🤖 Step 2/3: Analyzing with AI...")
self.analyze()
# Step 3: SEO Check
logger.info("\n🔍 Step 3/3: Checking SEO quality...")
self.seo_check()
logger.info("\n✅ Full pipeline completed!")
def _find_latest_export(self) -> Optional[str]:
"""
Find the latest exported CSV file.
Returns:
Path to latest CSV file or None if not found
"""
csv_files = list(self.output_dir.glob('all_posts_*.csv'))
if not csv_files:
return None
latest = max(csv_files, key=lambda f: f.stat().st_ctime)
return str(latest)
def _find_recommendation_files(self) -> List[str]:
"""
Find recommendation files in output directory.
Returns:
List of paths to recommendation files
"""
patterns = [
'category_assignments_*.csv',
'posts_with_ai_recommendations_*.csv',
'posts_to_move_*.csv',
'posts_to_consolidate_*.csv',
'posts_to_delete_*.csv'
]
files = []
for pattern in patterns:
files.extend(self.output_dir.glob(pattern))
return [str(f) for f in files]
def status(self) -> dict:
"""
Get status of output files.
Returns:
Dictionary with file information
"""
files = list(self.output_dir.glob('*.csv'))
status_info = {
'total_files': len(files),
'files': []
}
for file in sorted(files, key=lambda f: f.stat().st_ctime, reverse=True)[:10]:
status_info['files'].append({
'name': file.name,
'size_kb': file.stat().st_size / 1024,
'modified': datetime.fromtimestamp(file.stat().st_mtime).strftime('%Y-%m-%d %H:%M')
})
return status_info

14
src/seo/approval.py Normal file
View File

@@ -0,0 +1,14 @@
"""
Approval System Module - User approval for recommendations
"""
import sys
from pathlib import Path
# Import from scripts directory
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
sys.path.insert(0, str(scripts_dir))
from user_approval import UserApprovalSystem
__all__ = ['UserApprovalSystem']

14
src/seo/categories.py Normal file
View File

@@ -0,0 +1,14 @@
"""
Category Manager Module - Category management across sites
"""
import sys
from pathlib import Path
# Import from scripts directory
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
sys.path.insert(0, str(scripts_dir))
from category_manager import CategoryManager
__all__ = ['CategoryManager']

223
src/seo/cli.py Normal file
View File

@@ -0,0 +1,223 @@
#!/usr/bin/env python3
"""
SEO Automation CLI - Main entry point
Unified command-line interface for SEO automation.
"""
import sys
import argparse
from pathlib import Path
# Add src to path
src_dir = Path(__file__).parent / 'src'
sys.path.insert(0, str(src_dir))
from seo.app import SEOApp
from seo.config import Config
def main():
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
prog='seo',
description='SEO Automation CLI - Manage WordPress SEO with AI',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
seo export Export all posts from WordPress sites
seo analyze Analyze posts with AI for recommendations
seo analyze posts.csv Analyze specific CSV file
seo recategorize Recategorize posts with AI
seo seo_check Check SEO quality of titles/descriptions
seo categories Manage categories across sites
seo approve Review and approve recommendations
seo full_pipeline Run complete workflow: export → analyze → seo_check
seo status Show output files status
"""
)
parser.add_argument('command', nargs='?', help='Command to run')
parser.add_argument('args', nargs='*', help='Arguments for the command')
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
parser.add_argument('--dry-run', action='store_true', help='Show what would be done')
parser.add_argument('--top-n', type=int, default=10, help='Number of top posts for AI analysis')
args = parser.parse_args()
if not args.command:
parser.print_help()
return 0
# Validate configuration
try:
Config.validate()
except ValueError as e:
print(f"❌ Configuration error: {e}")
return 1
# Create application instance
app = SEOApp(verbose=args.verbose)
# Route to appropriate command
commands = {
'export': cmd_export,
'analyze': cmd_analyze,
'recategorize': cmd_recategorize,
'seo_check': cmd_seo_check,
'categories': cmd_categories,
'approve': cmd_approve,
'full_pipeline': cmd_full_pipeline,
'status': cmd_status,
'help': cmd_help,
}
if args.command not in commands:
print(f"❌ Unknown command: {args.command}")
print("\nAvailable commands:")
for cmd in sorted(commands.keys()):
print(f" {cmd}")
return 1
try:
return commands[args.command](app, args)
except KeyboardInterrupt:
print("\n⚠️ Operation cancelled by user")
return 1
except FileNotFoundError as e:
print(f"❌ File not found: {e}")
return 1
except Exception as e:
print(f"❌ Error: {e}")
if args.verbose:
import traceback
traceback.print_exc()
return 1
def cmd_export(app, args):
"""Export all posts."""
if args.dry_run:
print("Would export all posts from WordPress sites")
return 0
app.export()
return 0
def cmd_analyze(app, args):
"""Analyze posts with AI."""
if args.dry_run:
print("Would analyze posts with AI for recommendations")
return 0
csv_file = args.args[0] if args.args else None
app.analyze(csv_file)
return 0
def cmd_recategorize(app, args):
"""Recategorize posts with AI."""
if args.dry_run:
print("Would recategorize posts with AI suggestions")
return 0
csv_file = args.args[0] if args.args else None
app.recategorize(csv_file)
return 0
def cmd_seo_check(app, args):
"""Check SEO quality."""
if args.dry_run:
print("Would check SEO quality of titles/descriptions")
return 0
app.seo_check(top_n=args.top_n)
return 0
def cmd_categories(app, args):
"""Manage categories."""
if args.dry_run:
print("Would manage categories across all sites")
return 0
app.categories()
return 0
def cmd_approve(app, args):
"""Approve recommendations."""
if args.dry_run:
print("Would review and approve recommendations")
return 0
files = args.args if args.args else None
app.approve(files)
return 0
def cmd_full_pipeline(app, args):
"""Run full pipeline."""
if args.dry_run:
print("Would run full pipeline: export → analyze → seo_check")
return 0
app.full_pipeline()
return 0
def cmd_status(app, args):
"""Show status."""
if args.dry_run:
print("Would show output files status")
return 0
status = app.status()
print("📊 Output files status:")
if status['total_files'] > 0:
print(f"\nFound {status['total_files']} CSV files in output/:")
for file in status['files']:
print(f" {file['name']} ({file['size_kb']:.1f}KB, {file['modified']})")
else:
print(" No CSV files found in output/")
return 0
def cmd_help(app, args):
"""Show help."""
print("""
SEO Automation CLI - Available Commands
Basic Commands:
export Export all posts from WordPress sites
analyze [csv_file] Analyze posts with AI (optional CSV input)
recategorize [csv_file] Recategorize posts with AI (optional CSV input)
seo_check Check SEO quality of titles/descriptions
categories Manage categories across all sites
approve [files...] Review and approve recommendations
full_pipeline Run complete workflow: export → analyze → seo_check
Utility:
status Show output files status
help Show this help message
Options:
--verbose, -v Enable verbose logging
--dry-run Show what would be done without doing it
--top-n N Number of top posts for AI analysis (default: 10)
Examples:
seo export
seo analyze
seo analyze output/all_posts_2026-02-16.csv
seo approve output/category_assignments_*.csv
seo full_pipeline
seo status
""")
return 0
if __name__ == '__main__':
sys.exit(main())

70
src/seo/config.py Normal file
View File

@@ -0,0 +1,70 @@
"""
Configuration module for SEO application.
Loads configuration from environment variables and YAML.
"""
import os
import yaml
from dotenv import load_dotenv
from pathlib import Path
load_dotenv()
class Config:
"""Configuration class for SEO automation."""
CONFIG_FILE = Path(__file__).parent.parent / 'config.yaml'
if CONFIG_FILE.exists():
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
YAML_CONFIG = yaml.safe_load(f)
else:
YAML_CONFIG = {}
# WordPress Settings
WORDPRESS_URL = os.getenv('WORDPRESS_URL', YAML_CONFIG.get('primary_site', {}).get('url', '')).rstrip('/')
WORDPRESS_USERNAME = os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('primary_site', {}).get('username', ''))
WORDPRESS_APP_PASSWORD = os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('primary_site', {}).get('password', ''))
# Multi-site Configuration
WORDPRESS_SITES = {
'mistergeek.net': {
'url': os.getenv('WORDPRESS_MISTERGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('url', 'https://www.mistergeek.net')),
'username': os.getenv('WORDPRESS_MISTERGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
'password': os.getenv('WORDPRESS_MISTERGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
},
'webscroll.fr': {
'url': os.getenv('WORDPRESS_WEBSCROLL_URL', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('url', 'https://www.webscroll.fr')),
'username': os.getenv('WORDPRESS_WEBSCROLL_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
'password': os.getenv('WORDPRESS_WEBSCROLL_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
},
'hellogeek.net': {
'url': os.getenv('WORDPRESS_HELLOGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('url', 'https://www.hellogeek.net')),
'username': os.getenv('WORDPRESS_HELLOGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
'password': os.getenv('WORDPRESS_HELLOGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
}
}
# OpenRouter API Settings
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', YAML_CONFIG.get('ai_model', {}).get('api_key', ''))
AI_MODEL = os.getenv('AI_MODEL', YAML_CONFIG.get('ai_model', {}).get('name', 'anthropic/claude-3.5-sonnet'))
@classmethod
def validate(cls):
"""Validate configuration."""
errors = []
if not cls.WORDPRESS_URL:
errors.append("WORDPRESS_URL is required")
if not cls.WORDPRESS_USERNAME:
errors.append("WORDPRESS_USERNAME is required")
if not cls.WORDPRESS_APP_PASSWORD:
errors.append("WORDPRESS_APP_PASSWORD is required")
if not cls.OPENROUTER_API_KEY:
errors.append("OPENROUTER_API_KEY is required")
if errors:
raise ValueError("Configuration errors:\n" + "\n".join(f" - {e}" for e in errors))
return True

226
src/seo/exporter.py Normal file
View File

@@ -0,0 +1,226 @@
"""
Post Exporter Module - Export posts from WordPress sites
"""
import csv
import logging
import time
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional
import requests
from requests.auth import HTTPBasicAuth
import re
from .config import Config
logger = logging.getLogger(__name__)
class PostExporter:
"""Export posts from WordPress sites to CSV."""
def __init__(self):
"""Initialize the exporter."""
self.sites = Config.WORDPRESS_SITES
self.all_posts = []
self.category_cache = {}
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, str]:
"""Fetch category names from a WordPress site."""
if site_name in self.category_cache:
return self.category_cache[site_name]
logger.info(f" Fetching categories from {site_name}...")
categories = {}
base_url = site_config['url'].rstrip('/')
api_url = f"{base_url}/wp-json/wp/v2/categories"
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
try:
response = requests.get(api_url, params={'per_page': 100}, auth=auth, timeout=10)
response.raise_for_status()
for cat in response.json():
categories[cat['id']] = {'name': cat.get('name', ''), 'slug': cat.get('slug', '')}
logger.info(f" ✓ Fetched {len(categories)} categories")
except Exception as e:
logger.warning(f" Could not fetch categories from {site_name}: {e}")
self.category_cache[site_name] = categories
return categories
def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
"""Fetch all posts from a WordPress site."""
logger.info(f"\nFetching posts from {site_name}...")
posts = []
base_url = site_config['url'].rstrip('/')
api_url = f"{base_url}/wp-json/wp/v2/posts"
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
for status in ['publish', 'draft']:
page = 1
status_count = 0
while True:
try:
logger.info(f" Fetching page {page} ({status} posts)...")
response = requests.get(
api_url,
params={'page': page, 'per_page': 100, 'status': status},
auth=auth,
timeout=10
)
response.raise_for_status()
page_posts = response.json()
if not page_posts:
break
posts.extend(page_posts)
status_count += len(page_posts)
logger.info(f" ✓ Got {len(page_posts)} posts (total: {len(posts)})")
page += 1
time.sleep(0.5)
except requests.exceptions.HTTPError as e:
if response.status_code == 400:
logger.info(f" API limit reached (got {status_count} {status} posts)")
break
else:
logger.error(f"Error on page {page}: {e}")
break
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching from {site_name}: {e}")
break
logger.info(f"✓ Total posts from {site_name}: {len(posts)}\n")
return posts
def extract_post_details(self, post: Dict, site_name: str, category_map: Dict) -> Dict:
"""Extract post details for CSV export."""
title = post.get('title', {})
if isinstance(title, dict):
title = title.get('rendered', '')
content = post.get('content', {})
if isinstance(content, dict):
content = content.get('rendered', '')
content_text = re.sub('<[^<]+?>', '', content)[:500]
excerpt = post.get('excerpt', {})
if isinstance(excerpt, dict):
excerpt = excerpt.get('rendered', '')
excerpt_text = re.sub('<[^<]+?>', '', excerpt)
meta_dict = post.get('meta', {}) if isinstance(post.get('meta'), dict) else {}
meta_description = (
meta_dict.get('rank_math_description', '') or
meta_dict.get('_yoast_wpseo_metadesc', '') or ''
)
category_ids = post.get('categories', [])
category_names = ', '.join([
category_map.get(cat_id, {}).get('name', str(cat_id))
for cat_id in category_ids
]) if category_ids else ''
return {
'site': site_name,
'post_id': post['id'],
'status': post.get('status', 'publish'),
'title': title.strip(),
'slug': post.get('slug', ''),
'url': post.get('link', ''),
'author_id': post.get('author', ''),
'date_published': post.get('date', ''),
'date_modified': post.get('modified', ''),
'categories': category_names,
'tags': ', '.join([str(t) for t in post.get('tags', [])]),
'excerpt': excerpt_text.strip(),
'content_preview': content_text.strip(),
'seo_title': meta_dict.get('rank_math_title', ''),
'meta_description': meta_description,
'focus_keyword': meta_dict.get('rank_math_focus_keyword', ''),
'word_count': len(content_text.split()),
}
def export_to_csv(self, output_file: Optional[str] = None) -> str:
"""Export all posts to CSV."""
if not output_file:
output_dir = Path(__file__).parent.parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
date_str = datetime.now().strftime('%Y-%m-%d')
output_file = output_dir / f'all_posts_{date_str}.csv'
output_file = Path(output_file)
output_file.parent.mkdir(parents=True, exist_ok=True)
if not self.all_posts:
logger.error("No posts to export")
return None
fieldnames = [
'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id',
'date_published', 'date_modified', 'categories', 'tags', 'excerpt',
'content_preview', 'seo_title', 'meta_description', 'focus_keyword', 'word_count',
]
logger.info(f"Exporting {len(self.all_posts)} posts to CSV...")
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(self.all_posts)
logger.info(f"✓ CSV exported to: {output_file}")
return str(output_file)
def run(self):
"""Run the complete export process."""
logger.info("="*70)
logger.info("EXPORTING ALL POSTS FOR AI DECISION MAKING")
logger.info("="*70)
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
for site_name, config in self.sites.items():
categories = self.fetch_category_names(site_name, config)
posts = self.fetch_posts_from_site(site_name, config)
if posts:
for post in posts:
post_details = self.extract_post_details(post, site_name, categories)
self.all_posts.append(post_details)
if not self.all_posts:
logger.error("No posts found on any site")
return
self.all_posts.sort(key=lambda x: (x['site'], x['post_id']))
self.export_to_csv()
# Print summary
logger.info("\n" + "="*70)
logger.info("EXPORT SUMMARY")
logger.info("="*70)
by_site = {}
for post in self.all_posts:
site = post['site']
if site not in by_site:
by_site[site] = {'total': 0, 'published': 0, 'draft': 0}
by_site[site]['total'] += 1
if post['status'] == 'publish':
by_site[site]['published'] += 1
else:
by_site[site]['draft'] += 1
for site, stats in sorted(by_site.items()):
logger.info(f"\n{site}:")
logger.info(f" Total: {stats['total']}")
logger.info(f" Published: {stats['published']}")
logger.info(f" Drafts: {stats['draft']}")
logger.info(f"\n✓ Export complete!")

14
src/seo/recategorizer.py Normal file
View File

@@ -0,0 +1,14 @@
"""
Recategorizer Module - AI-powered post recategorization
"""
import sys
from pathlib import Path
# Import from scripts directory
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
sys.path.insert(0, str(scripts_dir))
from ai_recategorize_posts import PostRecategorizer
__all__ = ['PostRecategorizer']

14
src/seo/seo_checker.py Normal file
View File

@@ -0,0 +1,14 @@
"""
SEO Checker Module - SEO quality analysis
"""
import sys
from pathlib import Path
# Import from scripts directory
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
sys.path.insert(0, str(scripts_dir))
from multi_site_seo_analyzer import MultiSiteSEOAnalyzer
__all__ = ['MultiSiteSEOAnalyzer']