Spaces:
Running
Running
Commit
·
9bf19c4
1
Parent(s):
795900a
ok
Browse files- app.py +28 -30
- llm_recommendations.py +90 -67
- modules/backlinks.py +11 -16
- modules/content_audit.py +0 -11
- modules/keywords.py +3 -19
- modules/technical_seo.py +2 -14
- report_generator.py +99 -54
- simple_pdf_generator.py +16 -26
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for
|
| 3 |
import validators
|
| 4 |
import os
|
|
@@ -7,7 +7,7 @@ import uuid
|
|
| 7 |
from urllib.parse import urlparse
|
| 8 |
from typing import Dict, Any
|
| 9 |
|
| 10 |
-
|
| 11 |
from modules.technical_seo import TechnicalSEOModule
|
| 12 |
from modules.content_audit import ContentAuditModule
|
| 13 |
from modules.keywords import KeywordsModule
|
|
@@ -19,7 +19,7 @@ from llm_recommendations import LLMRecommendations
|
|
| 19 |
app = Flask(__name__, static_folder='static')
|
| 20 |
app.secret_key = 'seo_report_generator_2024'
|
| 21 |
|
| 22 |
-
|
| 23 |
technical_module = TechnicalSEOModule()
|
| 24 |
content_module = ContentAuditModule()
|
| 25 |
keywords_module = KeywordsModule()
|
|
@@ -28,11 +28,10 @@ report_gen = ReportGenerator()
|
|
| 28 |
pdf_gen = SimplePDFGenerator()
|
| 29 |
llm_recommendations = LLMRecommendations()
|
| 30 |
|
| 31 |
-
|
| 32 |
reports_store = {}
|
| 33 |
|
| 34 |
def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 35 |
-
"""Transform new keywords data structure to match report generator expectations"""
|
| 36 |
if not new_data or new_data.get('placeholder'):
|
| 37 |
return {
|
| 38 |
'placeholder': True,
|
|
@@ -44,7 +43,7 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 44 |
'data_source': 'Analysis failed'
|
| 45 |
}
|
| 46 |
|
| 47 |
-
|
| 48 |
totals = new_data.get('totals', {})
|
| 49 |
distribution = new_data.get('distribution', {})
|
| 50 |
movement = new_data.get('movement', {})
|
|
@@ -53,7 +52,7 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 53 |
opportunities = new_data.get('opportunities', [])
|
| 54 |
data_sources = new_data.get('data_sources', {})
|
| 55 |
|
| 56 |
-
|
| 57 |
pos_dist = {
|
| 58 |
'top_3': distribution.get('top3', 0),
|
| 59 |
'top_10': distribution.get('top10', 0),
|
|
@@ -61,27 +60,27 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 61 |
'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0)
|
| 62 |
}
|
| 63 |
|
| 64 |
-
|
| 65 |
transformed_best_keywords = []
|
| 66 |
for kw in best_keywords:
|
| 67 |
transformed_best_keywords.append({
|
| 68 |
'keyword': kw.get('keyword', ''),
|
| 69 |
'position': kw.get('rank', 0),
|
| 70 |
-
'clicks': 0,
|
| 71 |
'impressions': kw.get('volume', 0),
|
| 72 |
'url': kw.get('url', ''),
|
| 73 |
'estimated_traffic': kw.get('estimated_traffic', 0),
|
| 74 |
'trend': kw.get('trend', 'stable')
|
| 75 |
})
|
| 76 |
|
| 77 |
-
|
| 78 |
transformed_opportunities = []
|
| 79 |
for opp in opportunities:
|
| 80 |
transformed_opportunities.append({
|
| 81 |
'keyword': opp.get('keyword', ''),
|
| 82 |
-
'position': 0,
|
| 83 |
'impressions': opp.get('volume', 0),
|
| 84 |
-
'ctr': 0,
|
| 85 |
'competitor_rank': opp.get('competitor_rank', 0),
|
| 86 |
'priority_score': opp.get('priority_score', 0),
|
| 87 |
'competitor_domain': opp.get('competitor_domain', '')
|
|
@@ -119,30 +118,30 @@ def generate_report():
|
|
| 119 |
if not validators.url(url):
|
| 120 |
return jsonify({'error': 'Please enter a valid URL'}), 400
|
| 121 |
|
| 122 |
-
|
| 123 |
report_id = str(uuid.uuid4())
|
| 124 |
|
| 125 |
-
|
| 126 |
competitor_domains = []
|
| 127 |
competitor_list = []
|
| 128 |
for comp in competitors:
|
| 129 |
comp = comp.strip()
|
| 130 |
if comp and validators.url(comp):
|
| 131 |
competitor_list.append(comp)
|
| 132 |
-
|
| 133 |
domain = urlparse(comp).netloc.replace('www.', '')
|
| 134 |
competitor_domains.append(domain)
|
| 135 |
|
| 136 |
-
|
| 137 |
technical_data = technical_module.analyze(url)
|
| 138 |
|
| 139 |
-
|
| 140 |
content_data = content_module.analyze(url)
|
| 141 |
|
| 142 |
-
|
| 143 |
keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains)
|
| 144 |
if not keywords_result.success:
|
| 145 |
-
|
| 146 |
keywords_data = {
|
| 147 |
'placeholder': True,
|
| 148 |
'message': f'Keywords analysis failed: {keywords_result.error}',
|
|
@@ -153,10 +152,10 @@ def generate_report():
|
|
| 153 |
'data_source': 'Analysis failed'
|
| 154 |
}
|
| 155 |
else:
|
| 156 |
-
|
| 157 |
keywords_data = _transform_keywords_data(keywords_result.data)
|
| 158 |
|
| 159 |
-
|
| 160 |
print(f"DEBUG: Starting backlinks analysis for {url}")
|
| 161 |
backlinks_result = backlinks_module.analyze(url)
|
| 162 |
backlinks_data = backlinks_result.data
|
|
@@ -167,18 +166,18 @@ def generate_report():
|
|
| 167 |
if backlinks_data.get('placeholder'):
|
| 168 |
print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
|
| 169 |
|
| 170 |
-
|
| 171 |
llm_rec_data = llm_recommendations.generate_recommendations(
|
| 172 |
url, technical_data, content_data, keywords_data, backlinks_data
|
| 173 |
)
|
| 174 |
|
| 175 |
-
|
| 176 |
competitor_data = []
|
| 177 |
for comp_url in competitor_list:
|
| 178 |
comp_technical = technical_module.analyze(comp_url)
|
| 179 |
comp_content = content_module.analyze(comp_url, quick_scan=True)
|
| 180 |
|
| 181 |
-
|
| 182 |
comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True)
|
| 183 |
if comp_keywords_result.success:
|
| 184 |
comp_keywords = _transform_keywords_data(comp_keywords_result.data)
|
|
@@ -193,7 +192,7 @@ def generate_report():
|
|
| 193 |
'data_source': 'Analysis failed'
|
| 194 |
}
|
| 195 |
|
| 196 |
-
|
| 197 |
comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True)
|
| 198 |
comp_backlinks = comp_backlinks_result.data
|
| 199 |
|
|
@@ -205,7 +204,7 @@ def generate_report():
|
|
| 205 |
'backlinks': comp_backlinks
|
| 206 |
})
|
| 207 |
|
| 208 |
-
|
| 209 |
report_html = report_gen.generate_html_report(
|
| 210 |
url=url,
|
| 211 |
technical_data=technical_data,
|
|
@@ -217,7 +216,7 @@ def generate_report():
|
|
| 217 |
include_charts=True
|
| 218 |
)
|
| 219 |
|
| 220 |
-
|
| 221 |
reports_store[report_id] = {
|
| 222 |
'url': url,
|
| 223 |
'html': report_html,
|
|
@@ -256,7 +255,6 @@ def download_html(report_id):
|
|
| 256 |
|
| 257 |
report_data = reports_store[report_id]
|
| 258 |
|
| 259 |
-
# Create temporary file
|
| 260 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
|
| 261 |
f.write(report_data['html'])
|
| 262 |
temp_path = f.name
|
|
@@ -273,10 +271,10 @@ def download_pdf(report_id):
|
|
| 273 |
try:
|
| 274 |
report_data = reports_store[report_id]
|
| 275 |
|
| 276 |
-
|
| 277 |
pdf_data = pdf_gen.generate_pdf(report_data['html'])
|
| 278 |
|
| 279 |
-
|
| 280 |
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
|
| 281 |
f.write(pdf_data)
|
| 282 |
temp_path = f.name
|
|
|
|
| 1 |
+
|
| 2 |
from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for
|
| 3 |
import validators
|
| 4 |
import os
|
|
|
|
| 7 |
from urllib.parse import urlparse
|
| 8 |
from typing import Dict, Any
|
| 9 |
|
| 10 |
+
|
| 11 |
from modules.technical_seo import TechnicalSEOModule
|
| 12 |
from modules.content_audit import ContentAuditModule
|
| 13 |
from modules.keywords import KeywordsModule
|
|
|
|
| 19 |
app = Flask(__name__, static_folder='static')
|
| 20 |
app.secret_key = 'seo_report_generator_2024'
|
| 21 |
|
| 22 |
+
|
| 23 |
technical_module = TechnicalSEOModule()
|
| 24 |
content_module = ContentAuditModule()
|
| 25 |
keywords_module = KeywordsModule()
|
|
|
|
| 28 |
pdf_gen = SimplePDFGenerator()
|
| 29 |
llm_recommendations = LLMRecommendations()
|
| 30 |
|
| 31 |
+
|
| 32 |
reports_store = {}
|
| 33 |
|
| 34 |
def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
| 35 |
if not new_data or new_data.get('placeholder'):
|
| 36 |
return {
|
| 37 |
'placeholder': True,
|
|
|
|
| 43 |
'data_source': 'Analysis failed'
|
| 44 |
}
|
| 45 |
|
| 46 |
+
|
| 47 |
totals = new_data.get('totals', {})
|
| 48 |
distribution = new_data.get('distribution', {})
|
| 49 |
movement = new_data.get('movement', {})
|
|
|
|
| 52 |
opportunities = new_data.get('opportunities', [])
|
| 53 |
data_sources = new_data.get('data_sources', {})
|
| 54 |
|
| 55 |
+
|
| 56 |
pos_dist = {
|
| 57 |
'top_3': distribution.get('top3', 0),
|
| 58 |
'top_10': distribution.get('top10', 0),
|
|
|
|
| 60 |
'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0)
|
| 61 |
}
|
| 62 |
|
| 63 |
+
|
| 64 |
transformed_best_keywords = []
|
| 65 |
for kw in best_keywords:
|
| 66 |
transformed_best_keywords.append({
|
| 67 |
'keyword': kw.get('keyword', ''),
|
| 68 |
'position': kw.get('rank', 0),
|
| 69 |
+
'clicks': 0,
|
| 70 |
'impressions': kw.get('volume', 0),
|
| 71 |
'url': kw.get('url', ''),
|
| 72 |
'estimated_traffic': kw.get('estimated_traffic', 0),
|
| 73 |
'trend': kw.get('trend', 'stable')
|
| 74 |
})
|
| 75 |
|
| 76 |
+
|
| 77 |
transformed_opportunities = []
|
| 78 |
for opp in opportunities:
|
| 79 |
transformed_opportunities.append({
|
| 80 |
'keyword': opp.get('keyword', ''),
|
| 81 |
+
'position': 0,
|
| 82 |
'impressions': opp.get('volume', 0),
|
| 83 |
+
'ctr': 0,
|
| 84 |
'competitor_rank': opp.get('competitor_rank', 0),
|
| 85 |
'priority_score': opp.get('priority_score', 0),
|
| 86 |
'competitor_domain': opp.get('competitor_domain', '')
|
|
|
|
| 118 |
if not validators.url(url):
|
| 119 |
return jsonify({'error': 'Please enter a valid URL'}), 400
|
| 120 |
|
| 121 |
+
|
| 122 |
report_id = str(uuid.uuid4())
|
| 123 |
|
| 124 |
+
|
| 125 |
competitor_domains = []
|
| 126 |
competitor_list = []
|
| 127 |
for comp in competitors:
|
| 128 |
comp = comp.strip()
|
| 129 |
if comp and validators.url(comp):
|
| 130 |
competitor_list.append(comp)
|
| 131 |
+
|
| 132 |
domain = urlparse(comp).netloc.replace('www.', '')
|
| 133 |
competitor_domains.append(domain)
|
| 134 |
|
| 135 |
+
|
| 136 |
technical_data = technical_module.analyze(url)
|
| 137 |
|
| 138 |
+
|
| 139 |
content_data = content_module.analyze(url)
|
| 140 |
|
| 141 |
+
|
| 142 |
keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains)
|
| 143 |
if not keywords_result.success:
|
| 144 |
+
|
| 145 |
keywords_data = {
|
| 146 |
'placeholder': True,
|
| 147 |
'message': f'Keywords analysis failed: {keywords_result.error}',
|
|
|
|
| 152 |
'data_source': 'Analysis failed'
|
| 153 |
}
|
| 154 |
else:
|
| 155 |
+
|
| 156 |
keywords_data = _transform_keywords_data(keywords_result.data)
|
| 157 |
|
| 158 |
+
|
| 159 |
print(f"DEBUG: Starting backlinks analysis for {url}")
|
| 160 |
backlinks_result = backlinks_module.analyze(url)
|
| 161 |
backlinks_data = backlinks_result.data
|
|
|
|
| 166 |
if backlinks_data.get('placeholder'):
|
| 167 |
print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
|
| 168 |
|
| 169 |
+
|
| 170 |
llm_rec_data = llm_recommendations.generate_recommendations(
|
| 171 |
url, technical_data, content_data, keywords_data, backlinks_data
|
| 172 |
)
|
| 173 |
|
| 174 |
+
|
| 175 |
competitor_data = []
|
| 176 |
for comp_url in competitor_list:
|
| 177 |
comp_technical = technical_module.analyze(comp_url)
|
| 178 |
comp_content = content_module.analyze(comp_url, quick_scan=True)
|
| 179 |
|
| 180 |
+
|
| 181 |
comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True)
|
| 182 |
if comp_keywords_result.success:
|
| 183 |
comp_keywords = _transform_keywords_data(comp_keywords_result.data)
|
|
|
|
| 192 |
'data_source': 'Analysis failed'
|
| 193 |
}
|
| 194 |
|
| 195 |
+
|
| 196 |
comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True)
|
| 197 |
comp_backlinks = comp_backlinks_result.data
|
| 198 |
|
|
|
|
| 204 |
'backlinks': comp_backlinks
|
| 205 |
})
|
| 206 |
|
| 207 |
+
|
| 208 |
report_html = report_gen.generate_html_report(
|
| 209 |
url=url,
|
| 210 |
technical_data=technical_data,
|
|
|
|
| 216 |
include_charts=True
|
| 217 |
)
|
| 218 |
|
| 219 |
+
|
| 220 |
reports_store[report_id] = {
|
| 221 |
'url': url,
|
| 222 |
'html': report_html,
|
|
|
|
| 255 |
|
| 256 |
report_data = reports_store[report_id]
|
| 257 |
|
|
|
|
| 258 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
|
| 259 |
f.write(report_data['html'])
|
| 260 |
temp_path = f.name
|
|
|
|
| 271 |
try:
|
| 272 |
report_data = reports_store[report_id]
|
| 273 |
|
| 274 |
+
|
| 275 |
pdf_data = pdf_gen.generate_pdf(report_data['html'])
|
| 276 |
|
| 277 |
+
|
| 278 |
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
|
| 279 |
f.write(pdf_data)
|
| 280 |
temp_path = f.name
|
llm_recommendations.py
CHANGED
|
@@ -1,7 +1,4 @@
|
|
| 1 |
-
|
| 2 |
-
Groq LLM Integration for Smart SEO Recommendations
|
| 3 |
-
Analyzes all 4 modules (Technical SEO, Content Audit, Keywords, Backlinks) to generate intelligent recommendations
|
| 4 |
-
"""
|
| 5 |
|
| 6 |
import os
|
| 7 |
import json
|
|
@@ -9,7 +6,7 @@ from typing import Dict, Any, List
|
|
| 9 |
from groq import Groq
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
|
| 12 |
-
|
| 13 |
load_dotenv()
|
| 14 |
|
| 15 |
|
|
@@ -25,33 +22,20 @@ class LLMRecommendations:
|
|
| 25 |
def generate_recommendations(self, url: str, technical_data: Dict[str, Any],
|
| 26 |
content_data: Dict[str, Any], keywords_data: Dict[str, Any],
|
| 27 |
backlinks_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 28 |
-
"""
|
| 29 |
-
Generate comprehensive SEO recommendations based on all module data
|
| 30 |
-
|
| 31 |
-
Args:
|
| 32 |
-
url: Target website URL
|
| 33 |
-
technical_data: Technical SEO analysis results
|
| 34 |
-
content_data: Content audit results
|
| 35 |
-
keywords_data: Keywords analysis results
|
| 36 |
-
backlinks_data: Backlinks analysis results
|
| 37 |
-
|
| 38 |
-
Returns:
|
| 39 |
-
Dictionary with recommendations and insights
|
| 40 |
-
"""
|
| 41 |
if not self.available:
|
| 42 |
return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data)
|
| 43 |
|
| 44 |
try:
|
| 45 |
-
|
| 46 |
context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data)
|
| 47 |
|
| 48 |
-
|
| 49 |
recommendations = self._query_llm(context)
|
| 50 |
|
| 51 |
return {
|
| 52 |
-
'
|
| 53 |
'executive_insights': self._generate_executive_insights(context),
|
| 54 |
-
'priority_actions': self._extract_priority_actions(recommendations),
|
| 55 |
'data_source': 'Groq LLM Analysis',
|
| 56 |
'generated_at': context['analysis_date']
|
| 57 |
}
|
|
@@ -61,9 +45,8 @@ class LLMRecommendations:
|
|
| 61 |
|
| 62 |
def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict,
|
| 63 |
keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]:
|
| 64 |
-
"""Prepare structured context for LLM analysis"""
|
| 65 |
|
| 66 |
-
|
| 67 |
context = {
|
| 68 |
'website': url,
|
| 69 |
'analysis_date': technical_data.get('last_updated', ''),
|
|
@@ -101,7 +84,6 @@ class LLMRecommendations:
|
|
| 101 |
return context
|
| 102 |
|
| 103 |
def _query_llm(self, context: Dict[str, Any]) -> List[str]:
|
| 104 |
-
"""Query Groq LLM for SEO recommendations"""
|
| 105 |
|
| 106 |
prompt = f"""
|
| 107 |
You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations.
|
|
@@ -143,12 +125,18 @@ CRITICAL INSTRUCTIONS:
|
|
| 143 |
5. Prioritize recommendations by potential impact and ease of implementation
|
| 144 |
6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies
|
| 145 |
7. Provide estimated timelines and resources needed for each recommendation
|
|
|
|
|
|
|
| 146 |
|
| 147 |
-
Generate exactly 8-12 specific recommendations
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
Priority Levels: HIGH, MEDIUM, LOW
|
| 151 |
Focus on actionable items that can be implemented within 30-90 days.
|
|
|
|
| 152 |
|
| 153 |
Response:
|
| 154 |
"""
|
|
@@ -158,35 +146,25 @@ Response:
|
|
| 158 |
messages=[
|
| 159 |
{'role': 'user', 'content': prompt}
|
| 160 |
],
|
| 161 |
-
model="
|
| 162 |
stream=False,
|
| 163 |
-
temperature=0.1,
|
| 164 |
max_tokens=1500
|
| 165 |
)
|
| 166 |
|
| 167 |
response = chat_completion.choices[0].message.content.strip()
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
for line in lines:
|
| 173 |
-
line = line.strip()
|
| 174 |
-
if line.startswith('- **') or line.startswith('•'):
|
| 175 |
-
# Clean up the recommendation
|
| 176 |
-
recommendation = line.replace('- **', '').replace('• **', '').strip()
|
| 177 |
-
if recommendation:
|
| 178 |
-
recommendations.append(recommendation)
|
| 179 |
-
|
| 180 |
-
return recommendations if recommendations else [response]
|
| 181 |
|
| 182 |
except Exception as e:
|
| 183 |
return [f"LLM Error: {str(e)}"]
|
| 184 |
|
| 185 |
def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]:
|
| 186 |
-
"""Generate high-level executive insights"""
|
| 187 |
insights = []
|
| 188 |
|
| 189 |
-
|
| 190 |
mobile_score = context['technical_seo']['mobile_score']
|
| 191 |
desktop_score = context['technical_seo']['desktop_score']
|
| 192 |
avg_score = (mobile_score + desktop_score) / 2
|
|
@@ -198,7 +176,7 @@ Response:
|
|
| 198 |
else:
|
| 199 |
insights.append(f"🟢 Good: Website performance is solid (avg: {avg_score:.0f}/100)")
|
| 200 |
|
| 201 |
-
|
| 202 |
pages = context['content_audit']['pages_analyzed']
|
| 203 |
if pages > 0:
|
| 204 |
metadata = context['content_audit']['metadata_completeness']
|
|
@@ -209,7 +187,7 @@ Response:
|
|
| 209 |
else:
|
| 210 |
insights.append(f"🟢 Content Quality: Metadata completeness is good ({title_pct:.0f}%)")
|
| 211 |
|
| 212 |
-
|
| 213 |
if context['keywords']['data_available']:
|
| 214 |
total_keywords = context['keywords']['total_keywords']
|
| 215 |
pos_dist = context['keywords']['position_distribution']
|
|
@@ -224,7 +202,7 @@ Response:
|
|
| 224 |
else:
|
| 225 |
insights.append("📊 Connect keyword tracking tools for visibility insights")
|
| 226 |
|
| 227 |
-
|
| 228 |
if context['backlinks']['data_available']:
|
| 229 |
ref_domains = context['backlinks']['total_ref_domains']
|
| 230 |
domain_rating = context['backlinks']['domain_rating']
|
|
@@ -241,22 +219,65 @@ Response:
|
|
| 241 |
return insights
|
| 242 |
|
| 243 |
def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]:
|
| 244 |
-
"""Extract priority actions from recommendations"""
|
| 245 |
priority_actions = []
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
-
|
| 260 |
if not priority_actions and recommendations:
|
| 261 |
for i, rec in enumerate(recommendations[:3]):
|
| 262 |
if ':' in rec:
|
|
@@ -267,15 +288,14 @@ Response:
|
|
| 267 |
'priority': 'HIGH'
|
| 268 |
})
|
| 269 |
|
| 270 |
-
return priority_actions[:5]
|
| 271 |
|
| 272 |
def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict,
|
| 273 |
keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]:
|
| 274 |
-
"""Generate basic recommendations when LLM is not available"""
|
| 275 |
|
| 276 |
recommendations = []
|
| 277 |
|
| 278 |
-
|
| 279 |
mobile_score = technical_data.get('mobile_score', 0)
|
| 280 |
desktop_score = technical_data.get('desktop_score', 0)
|
| 281 |
|
|
@@ -284,7 +304,7 @@ Response:
|
|
| 284 |
if desktop_score < 50:
|
| 285 |
recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript")
|
| 286 |
|
| 287 |
-
|
| 288 |
pages = content_data.get('pages_analyzed', 0)
|
| 289 |
if pages > 0:
|
| 290 |
metadata = content_data.get('metadata_completeness', {})
|
|
@@ -294,7 +314,7 @@ Response:
|
|
| 294 |
if content_data.get('avg_word_count', 0) < 300:
|
| 295 |
recommendations.append("**MEDIUM** Enhance Content: Increase average page content length")
|
| 296 |
|
| 297 |
-
|
| 298 |
if not keywords_data.get('placeholder', False):
|
| 299 |
total_keywords = keywords_data.get('total_keywords', 0)
|
| 300 |
pos_dist = keywords_data.get('position_distribution', {})
|
|
@@ -304,7 +324,7 @@ Response:
|
|
| 304 |
else:
|
| 305 |
recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights")
|
| 306 |
|
| 307 |
-
|
| 308 |
if not backlinks_data.get('placeholder', False):
|
| 309 |
ref_domains = backlinks_data.get('total_ref_domains', 0)
|
| 310 |
if ref_domains < 50:
|
|
@@ -312,7 +332,7 @@ Response:
|
|
| 312 |
else:
|
| 313 |
recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis")
|
| 314 |
|
| 315 |
-
|
| 316 |
if not recommendations:
|
| 317 |
recommendations = [
|
| 318 |
"**HIGH** Audit Technical Issues: Review site speed and mobile performance",
|
|
@@ -329,8 +349,11 @@ Response:
|
|
| 329 |
if error:
|
| 330 |
insights.append(f"❌ LLM Error: {error}")
|
| 331 |
|
|
|
|
|
|
|
|
|
|
| 332 |
return {
|
| 333 |
-
'
|
| 334 |
'executive_insights': insights,
|
| 335 |
'priority_actions': [
|
| 336 |
{
|
|
|
|
| 1 |
+
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import os
|
| 4 |
import json
|
|
|
|
| 6 |
from groq import Groq
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
|
| 9 |
+
|
| 10 |
load_dotenv()
|
| 11 |
|
| 12 |
|
|
|
|
| 22 |
def generate_recommendations(self, url: str, technical_data: Dict[str, Any],
|
| 23 |
content_data: Dict[str, Any], keywords_data: Dict[str, Any],
|
| 24 |
backlinks_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
if not self.available:
|
| 26 |
return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data)
|
| 27 |
|
| 28 |
try:
|
| 29 |
+
|
| 30 |
context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data)
|
| 31 |
|
| 32 |
+
|
| 33 |
recommendations = self._query_llm(context)
|
| 34 |
|
| 35 |
return {
|
| 36 |
+
'recommendations_markdown': recommendations,
|
| 37 |
'executive_insights': self._generate_executive_insights(context),
|
| 38 |
+
'priority_actions': self._extract_priority_actions([recommendations]),
|
| 39 |
'data_source': 'Groq LLM Analysis',
|
| 40 |
'generated_at': context['analysis_date']
|
| 41 |
}
|
|
|
|
| 45 |
|
| 46 |
def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict,
|
| 47 |
keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]:
|
|
|
|
| 48 |
|
| 49 |
+
|
| 50 |
context = {
|
| 51 |
'website': url,
|
| 52 |
'analysis_date': technical_data.get('last_updated', ''),
|
|
|
|
| 84 |
return context
|
| 85 |
|
| 86 |
def _query_llm(self, context: Dict[str, Any]) -> List[str]:
|
|
|
|
| 87 |
|
| 88 |
prompt = f"""
|
| 89 |
You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations.
|
|
|
|
| 125 |
5. Prioritize recommendations by potential impact and ease of implementation
|
| 126 |
6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies
|
| 127 |
7. Provide estimated timelines and resources needed for each recommendation
|
| 128 |
+
8. IMPORTANT: Use ONLY plain text format with markdown syntax - NO tables, NO complex formatting, NO HTML
|
| 129 |
+
9. Format your response as clean markdown that can be rendered properly
|
| 130 |
|
| 131 |
+
Generate exactly 8-12 specific recommendations using simple markdown format:
|
| 132 |
+
## Priority: HIGH/MEDIUM/LOW
|
| 133 |
+
**Action Title**
|
| 134 |
+
Description with clear steps and expected impact.
|
| 135 |
+
Timeline: X weeks
|
| 136 |
|
| 137 |
Priority Levels: HIGH, MEDIUM, LOW
|
| 138 |
Focus on actionable items that can be implemented within 30-90 days.
|
| 139 |
+
Use simple markdown formatting only - headers, bold text, and bullet points.
|
| 140 |
|
| 141 |
Response:
|
| 142 |
"""
|
|
|
|
| 146 |
messages=[
|
| 147 |
{'role': 'user', 'content': prompt}
|
| 148 |
],
|
| 149 |
+
model="openai/gpt-oss-120b",
|
| 150 |
stream=False,
|
| 151 |
+
temperature=0.1,
|
| 152 |
max_tokens=1500
|
| 153 |
)
|
| 154 |
|
| 155 |
response = chat_completion.choices[0].message.content.strip()
|
| 156 |
|
| 157 |
+
|
| 158 |
+
# Return the full markdown response instead of parsing individual recommendations
|
| 159 |
+
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
except Exception as e:
|
| 162 |
return [f"LLM Error: {str(e)}"]
|
| 163 |
|
| 164 |
def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]:
|
|
|
|
| 165 |
insights = []
|
| 166 |
|
| 167 |
+
|
| 168 |
mobile_score = context['technical_seo']['mobile_score']
|
| 169 |
desktop_score = context['technical_seo']['desktop_score']
|
| 170 |
avg_score = (mobile_score + desktop_score) / 2
|
|
|
|
| 176 |
else:
|
| 177 |
insights.append(f"🟢 Good: Website performance is solid (avg: {avg_score:.0f}/100)")
|
| 178 |
|
| 179 |
+
|
| 180 |
pages = context['content_audit']['pages_analyzed']
|
| 181 |
if pages > 0:
|
| 182 |
metadata = context['content_audit']['metadata_completeness']
|
|
|
|
| 187 |
else:
|
| 188 |
insights.append(f"🟢 Content Quality: Metadata completeness is good ({title_pct:.0f}%)")
|
| 189 |
|
| 190 |
+
|
| 191 |
if context['keywords']['data_available']:
|
| 192 |
total_keywords = context['keywords']['total_keywords']
|
| 193 |
pos_dist = context['keywords']['position_distribution']
|
|
|
|
| 202 |
else:
|
| 203 |
insights.append("📊 Connect keyword tracking tools for visibility insights")
|
| 204 |
|
| 205 |
+
|
| 206 |
if context['backlinks']['data_available']:
|
| 207 |
ref_domains = context['backlinks']['total_ref_domains']
|
| 208 |
domain_rating = context['backlinks']['domain_rating']
|
|
|
|
| 219 |
return insights
|
| 220 |
|
| 221 |
def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]:
|
|
|
|
| 222 |
priority_actions = []
|
| 223 |
|
| 224 |
+
# Handle the case where recommendations is a single string (markdown)
|
| 225 |
+
if isinstance(recommendations, list) and len(recommendations) == 1:
|
| 226 |
+
markdown_text = recommendations[0]
|
| 227 |
+
elif isinstance(recommendations, str):
|
| 228 |
+
markdown_text = recommendations
|
| 229 |
+
else:
|
| 230 |
+
markdown_text = ""
|
| 231 |
+
|
| 232 |
+
# Extract high priority actions from markdown
|
| 233 |
+
if markdown_text:
|
| 234 |
+
lines = markdown_text.split('\n')
|
| 235 |
+
current_priority = None
|
| 236 |
+
current_title = None
|
| 237 |
+
current_description = []
|
| 238 |
+
|
| 239 |
+
for line in lines:
|
| 240 |
+
line = line.strip()
|
| 241 |
+
if line.startswith('## Priority:'):
|
| 242 |
+
# Save previous action if exists
|
| 243 |
+
if current_title and current_priority == 'HIGH':
|
| 244 |
+
priority_actions.append({
|
| 245 |
+
'title': current_title,
|
| 246 |
+
'description': ' '.join(current_description).strip(),
|
| 247 |
+
'priority': 'HIGH'
|
| 248 |
+
})
|
| 249 |
+
|
| 250 |
+
# Start new action
|
| 251 |
+
current_priority = line.replace('## Priority:', '').strip()
|
| 252 |
+
current_title = None
|
| 253 |
+
current_description = []
|
| 254 |
+
elif line.startswith('**') and line.endswith('**'):
|
| 255 |
+
current_title = line.replace('**', '').strip()
|
| 256 |
+
elif line and not line.startswith('#'):
|
| 257 |
+
current_description.append(line)
|
| 258 |
+
|
| 259 |
+
# Save last action if exists
|
| 260 |
+
if current_title and current_priority == 'HIGH':
|
| 261 |
+
priority_actions.append({
|
| 262 |
+
'title': current_title,
|
| 263 |
+
'description': ' '.join(current_description).strip(),
|
| 264 |
+
'priority': 'HIGH'
|
| 265 |
+
})
|
| 266 |
+
|
| 267 |
+
# Fallback for old format
|
| 268 |
+
if not priority_actions and isinstance(recommendations, list):
|
| 269 |
+
for rec in recommendations:
|
| 270 |
+
if '**HIGH**' in rec or '**CRITICAL**' in rec:
|
| 271 |
+
parts = rec.replace('**HIGH**', '').replace('**CRITICAL**', '').strip()
|
| 272 |
+
if ':' in parts:
|
| 273 |
+
title, description = parts.split(':', 1)
|
| 274 |
+
priority_actions.append({
|
| 275 |
+
'title': title.strip(),
|
| 276 |
+
'description': description.strip(),
|
| 277 |
+
'priority': 'HIGH'
|
| 278 |
+
})
|
| 279 |
|
| 280 |
+
|
| 281 |
if not priority_actions and recommendations:
|
| 282 |
for i, rec in enumerate(recommendations[:3]):
|
| 283 |
if ':' in rec:
|
|
|
|
| 288 |
'priority': 'HIGH'
|
| 289 |
})
|
| 290 |
|
| 291 |
+
return priority_actions[:5]
|
| 292 |
|
| 293 |
def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict,
|
| 294 |
keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]:
|
|
|
|
| 295 |
|
| 296 |
recommendations = []
|
| 297 |
|
| 298 |
+
|
| 299 |
mobile_score = technical_data.get('mobile_score', 0)
|
| 300 |
desktop_score = technical_data.get('desktop_score', 0)
|
| 301 |
|
|
|
|
| 304 |
if desktop_score < 50:
|
| 305 |
recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript")
|
| 306 |
|
| 307 |
+
|
| 308 |
pages = content_data.get('pages_analyzed', 0)
|
| 309 |
if pages > 0:
|
| 310 |
metadata = content_data.get('metadata_completeness', {})
|
|
|
|
| 314 |
if content_data.get('avg_word_count', 0) < 300:
|
| 315 |
recommendations.append("**MEDIUM** Enhance Content: Increase average page content length")
|
| 316 |
|
| 317 |
+
|
| 318 |
if not keywords_data.get('placeholder', False):
|
| 319 |
total_keywords = keywords_data.get('total_keywords', 0)
|
| 320 |
pos_dist = keywords_data.get('position_distribution', {})
|
|
|
|
| 324 |
else:
|
| 325 |
recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights")
|
| 326 |
|
| 327 |
+
|
| 328 |
if not backlinks_data.get('placeholder', False):
|
| 329 |
ref_domains = backlinks_data.get('total_ref_domains', 0)
|
| 330 |
if ref_domains < 50:
|
|
|
|
| 332 |
else:
|
| 333 |
recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis")
|
| 334 |
|
| 335 |
+
|
| 336 |
if not recommendations:
|
| 337 |
recommendations = [
|
| 338 |
"**HIGH** Audit Technical Issues: Review site speed and mobile performance",
|
|
|
|
| 349 |
if error:
|
| 350 |
insights.append(f"❌ LLM Error: {error}")
|
| 351 |
|
| 352 |
+
# Convert recommendations list to markdown format
|
| 353 |
+
markdown_recommendations = "\n".join([f"## Priority: HIGH\n**{rec.replace('**HIGH**', '').replace('**MEDIUM**', '').replace('**LOW**', '').strip()}**\n" for rec in recommendations])
|
| 354 |
+
|
| 355 |
return {
|
| 356 |
+
'recommendations_markdown': markdown_recommendations,
|
| 357 |
'executive_insights': insights,
|
| 358 |
'priority_actions': [
|
| 359 |
{
|
modules/backlinks.py
CHANGED
|
@@ -73,14 +73,12 @@ class BacklinksModule:
|
|
| 73 |
)
|
| 74 |
|
| 75 |
def _extract_domain(self, url: str) -> str:
|
| 76 |
-
"""Extract clean domain from URL"""
|
| 77 |
if not url.startswith(('http://', 'https://')):
|
| 78 |
url = 'https://' + url
|
| 79 |
domain = urlparse(url).netloc.replace('www.', '')
|
| 80 |
return domain
|
| 81 |
|
| 82 |
def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
|
| 83 |
-
"""Make API request with retry logic"""
|
| 84 |
if headers is None:
|
| 85 |
headers = self.headers.copy()
|
| 86 |
|
|
@@ -90,8 +88,8 @@ class BacklinksModule:
|
|
| 90 |
|
| 91 |
if response.status_code == 200:
|
| 92 |
return response.json()
|
| 93 |
-
elif response.status_code == 429:
|
| 94 |
-
wait_time = (attempt + 1) * 2
|
| 95 |
print(f"Rate limited, waiting {wait_time}s...")
|
| 96 |
time.sleep(wait_time)
|
| 97 |
continue
|
|
@@ -124,7 +122,7 @@ class BacklinksModule:
|
|
| 124 |
# Limit results for quick scan
|
| 125 |
if quick_scan:
|
| 126 |
return data[:50]
|
| 127 |
-
return data[:500]
|
| 128 |
|
| 129 |
except Exception as e:
|
| 130 |
print(f"Individual backlinks API error: {str(e)}")
|
|
@@ -132,7 +130,6 @@ class BacklinksModule:
|
|
| 132 |
return []
|
| 133 |
|
| 134 |
def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
|
| 135 |
-
"""Get Majestic domain metrics via RapidAPI"""
|
| 136 |
try:
|
| 137 |
headers = self.headers.copy()
|
| 138 |
headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
|
|
@@ -274,7 +271,7 @@ class BacklinksModule:
|
|
| 274 |
|
| 275 |
# Sort by backlinks count and return top domains
|
| 276 |
top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
|
| 277 |
-
return top_domains[:20]
|
| 278 |
|
| 279 |
def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
|
| 280 |
"""Analyze anchor text distribution"""
|
|
@@ -282,7 +279,7 @@ class BacklinksModule:
|
|
| 282 |
|
| 283 |
for link in backlinks:
|
| 284 |
anchor = link.get('anchor', '').strip()
|
| 285 |
-
if not anchor or len(anchor) > 100:
|
| 286 |
continue
|
| 287 |
|
| 288 |
if anchor not in anchor_stats:
|
|
@@ -316,7 +313,7 @@ class BacklinksModule:
|
|
| 316 |
|
| 317 |
# Sort by backlinks count
|
| 318 |
anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
|
| 319 |
-
return anchor_distribution[:15]
|
| 320 |
|
| 321 |
def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
|
| 322 |
"""Calculate monthly backlinks changes"""
|
|
@@ -335,14 +332,14 @@ class BacklinksModule:
|
|
| 335 |
link_date = datetime.strptime(first_seen, '%Y-%m-%d')
|
| 336 |
if link_date >= last_month:
|
| 337 |
new_links += 1
|
| 338 |
-
if link_date >= now - timedelta(days=90):
|
| 339 |
recent_links += 1
|
| 340 |
except Exception:
|
| 341 |
continue
|
| 342 |
|
| 343 |
return {
|
| 344 |
'new_backlinks': new_links,
|
| 345 |
-
'lost_backlinks': 0,
|
| 346 |
'net_change': new_links,
|
| 347 |
'recent_backlinks_3m': recent_links
|
| 348 |
}
|
|
@@ -384,9 +381,9 @@ class BacklinksModule:
|
|
| 384 |
|
| 385 |
# Quality score (0-100)
|
| 386 |
quality_score = min(100, (
|
| 387 |
-
(follow_ratio * 0.4) +
|
| 388 |
-
(avg_authority * 2) +
|
| 389 |
-
(min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1)
|
| 390 |
))
|
| 391 |
|
| 392 |
return {
|
|
@@ -398,7 +395,6 @@ class BacklinksModule:
|
|
| 398 |
}
|
| 399 |
|
| 400 |
def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
|
| 401 |
-
"""Track which data sources provided information"""
|
| 402 |
sources = []
|
| 403 |
|
| 404 |
if individual_backlinks:
|
|
@@ -411,7 +407,6 @@ class BacklinksModule:
|
|
| 411 |
return sources or ['No data sources available']
|
| 412 |
|
| 413 |
def _generate_no_api_data(self, url: str) -> ModuleResult:
|
| 414 |
-
"""Generate response when no API key is available"""
|
| 415 |
domain = self._extract_domain(url)
|
| 416 |
|
| 417 |
no_api_data = {
|
|
|
|
| 73 |
)
|
| 74 |
|
| 75 |
def _extract_domain(self, url: str) -> str:
|
|
|
|
| 76 |
if not url.startswith(('http://', 'https://')):
|
| 77 |
url = 'https://' + url
|
| 78 |
domain = urlparse(url).netloc.replace('www.', '')
|
| 79 |
return domain
|
| 80 |
|
| 81 |
def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
|
|
|
|
| 82 |
if headers is None:
|
| 83 |
headers = self.headers.copy()
|
| 84 |
|
|
|
|
| 88 |
|
| 89 |
if response.status_code == 200:
|
| 90 |
return response.json()
|
| 91 |
+
elif response.status_code == 429:
|
| 92 |
+
wait_time = (attempt + 1) * 2
|
| 93 |
print(f"Rate limited, waiting {wait_time}s...")
|
| 94 |
time.sleep(wait_time)
|
| 95 |
continue
|
|
|
|
| 122 |
# Limit results for quick scan
|
| 123 |
if quick_scan:
|
| 124 |
return data[:50]
|
| 125 |
+
return data[:500]
|
| 126 |
|
| 127 |
except Exception as e:
|
| 128 |
print(f"Individual backlinks API error: {str(e)}")
|
|
|
|
| 130 |
return []
|
| 131 |
|
| 132 |
def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
|
|
|
|
| 133 |
try:
|
| 134 |
headers = self.headers.copy()
|
| 135 |
headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
|
|
|
|
| 271 |
|
| 272 |
# Sort by backlinks count and return top domains
|
| 273 |
top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
|
| 274 |
+
return top_domains[:20]
|
| 275 |
|
| 276 |
def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
|
| 277 |
"""Analyze anchor text distribution"""
|
|
|
|
| 279 |
|
| 280 |
for link in backlinks:
|
| 281 |
anchor = link.get('anchor', '').strip()
|
| 282 |
+
if not anchor or len(anchor) > 100:
|
| 283 |
continue
|
| 284 |
|
| 285 |
if anchor not in anchor_stats:
|
|
|
|
| 313 |
|
| 314 |
# Sort by backlinks count
|
| 315 |
anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
|
| 316 |
+
return anchor_distribution[:15]
|
| 317 |
|
| 318 |
def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
|
| 319 |
"""Calculate monthly backlinks changes"""
|
|
|
|
| 332 |
link_date = datetime.strptime(first_seen, '%Y-%m-%d')
|
| 333 |
if link_date >= last_month:
|
| 334 |
new_links += 1
|
| 335 |
+
if link_date >= now - timedelta(days=90):
|
| 336 |
recent_links += 1
|
| 337 |
except Exception:
|
| 338 |
continue
|
| 339 |
|
| 340 |
return {
|
| 341 |
'new_backlinks': new_links,
|
| 342 |
+
'lost_backlinks': 0,
|
| 343 |
'net_change': new_links,
|
| 344 |
'recent_backlinks_3m': recent_links
|
| 345 |
}
|
|
|
|
| 381 |
|
| 382 |
# Quality score (0-100)
|
| 383 |
quality_score = min(100, (
|
| 384 |
+
(follow_ratio * 0.4) +
|
| 385 |
+
(avg_authority * 2) +
|
| 386 |
+
(min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1)
|
| 387 |
))
|
| 388 |
|
| 389 |
return {
|
|
|
|
| 395 |
}
|
| 396 |
|
| 397 |
def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
|
|
|
|
| 398 |
sources = []
|
| 399 |
|
| 400 |
if individual_backlinks:
|
|
|
|
| 407 |
return sources or ['No data sources available']
|
| 408 |
|
| 409 |
def _generate_no_api_data(self, url: str) -> ModuleResult:
|
|
|
|
| 410 |
domain = self._extract_domain(url)
|
| 411 |
|
| 412 |
no_api_data = {
|
modules/content_audit.py
CHANGED
|
@@ -59,7 +59,6 @@ class ContentAuditModule:
|
|
| 59 |
return self._get_fallback_data(url, str(e))
|
| 60 |
|
| 61 |
def _get_sitemap_urls(self, base_url: str, limit: int = 200) -> List[str]:
|
| 62 |
-
"""Extract URLs from sitemap.xml"""
|
| 63 |
urls = []
|
| 64 |
|
| 65 |
# Common sitemap locations
|
|
@@ -81,7 +80,6 @@ class ContentAuditModule:
|
|
| 81 |
return urls[:limit]
|
| 82 |
|
| 83 |
def _parse_sitemap(self, sitemap_content: bytes, base_url: str, limit: int) -> List[str]:
|
| 84 |
-
"""Parse sitemap XML content"""
|
| 85 |
urls = []
|
| 86 |
|
| 87 |
try:
|
|
@@ -117,7 +115,6 @@ class ContentAuditModule:
|
|
| 117 |
return urls[:limit]
|
| 118 |
|
| 119 |
def _crawl_from_homepage(self, base_url: str, limit: int = 50) -> List[str]:
|
| 120 |
-
"""Crawl URLs starting from homepage"""
|
| 121 |
urls = set([base_url])
|
| 122 |
processed = set()
|
| 123 |
|
|
@@ -143,7 +140,6 @@ class ContentAuditModule:
|
|
| 143 |
return list(urls)[:limit]
|
| 144 |
|
| 145 |
def _analyze_page(self, url: str) -> Dict[str, Any]:
|
| 146 |
-
"""Analyze a single page"""
|
| 147 |
try:
|
| 148 |
response = self.session.get(url, timeout=15)
|
| 149 |
if response.status_code != 200:
|
|
@@ -208,7 +204,6 @@ class ContentAuditModule:
|
|
| 208 |
return soup.get_text()
|
| 209 |
|
| 210 |
def _detect_cta(self, soup: BeautifulSoup) -> bool:
|
| 211 |
-
"""Detect presence of call-to-action elements"""
|
| 212 |
text_content = soup.get_text().lower()
|
| 213 |
|
| 214 |
for keyword in self.cta_keywords:
|
|
@@ -225,7 +220,6 @@ class ContentAuditModule:
|
|
| 225 |
return False
|
| 226 |
|
| 227 |
def _get_last_modified(self, headers: Dict, soup: BeautifulSoup) -> str:
|
| 228 |
-
"""Get last modified date from headers or meta tags"""
|
| 229 |
# Check headers first
|
| 230 |
if 'last-modified' in headers:
|
| 231 |
return headers['last-modified']
|
|
@@ -240,7 +234,6 @@ class ContentAuditModule:
|
|
| 240 |
return ""
|
| 241 |
|
| 242 |
def _is_valid_content_url(self, url: str) -> bool:
|
| 243 |
-
"""Check if URL is valid for content analysis"""
|
| 244 |
if not url:
|
| 245 |
return False
|
| 246 |
|
|
@@ -261,7 +254,6 @@ class ContentAuditModule:
|
|
| 261 |
return True
|
| 262 |
|
| 263 |
def _is_same_domain(self, url1: str, url2: str) -> bool:
|
| 264 |
-
"""Check if two URLs are from the same domain"""
|
| 265 |
try:
|
| 266 |
domain1 = urlparse(url1).netloc
|
| 267 |
domain2 = urlparse(url2).netloc
|
|
@@ -270,7 +262,6 @@ class ContentAuditModule:
|
|
| 270 |
return False
|
| 271 |
|
| 272 |
def _calculate_metrics(self, base_url: str, pages_data: List[Dict], quick_scan: bool) -> Dict[str, Any]:
|
| 273 |
-
"""Calculate aggregate metrics from page data"""
|
| 274 |
total_pages = len(pages_data)
|
| 275 |
valid_pages = [p for p in pages_data if 'error' not in p]
|
| 276 |
|
|
@@ -318,7 +309,6 @@ class ContentAuditModule:
|
|
| 318 |
}
|
| 319 |
|
| 320 |
def _analyze_content_freshness(self, pages_data: List[Dict]) -> Dict[str, Any]:
|
| 321 |
-
"""Analyze content freshness based on last modified dates"""
|
| 322 |
now = datetime.now()
|
| 323 |
six_months_ago = now - timedelta(days=180)
|
| 324 |
eighteen_months_ago = now - timedelta(days=540)
|
|
@@ -361,7 +351,6 @@ class ContentAuditModule:
|
|
| 361 |
}
|
| 362 |
|
| 363 |
def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
|
| 364 |
-
"""Return fallback data when analysis fails"""
|
| 365 |
return {
|
| 366 |
'url': url,
|
| 367 |
'error': f"Content audit failed: {error}",
|
|
|
|
| 59 |
return self._get_fallback_data(url, str(e))
|
| 60 |
|
| 61 |
def _get_sitemap_urls(self, base_url: str, limit: int = 200) -> List[str]:
|
|
|
|
| 62 |
urls = []
|
| 63 |
|
| 64 |
# Common sitemap locations
|
|
|
|
| 80 |
return urls[:limit]
|
| 81 |
|
| 82 |
def _parse_sitemap(self, sitemap_content: bytes, base_url: str, limit: int) -> List[str]:
|
|
|
|
| 83 |
urls = []
|
| 84 |
|
| 85 |
try:
|
|
|
|
| 115 |
return urls[:limit]
|
| 116 |
|
| 117 |
def _crawl_from_homepage(self, base_url: str, limit: int = 50) -> List[str]:
|
|
|
|
| 118 |
urls = set([base_url])
|
| 119 |
processed = set()
|
| 120 |
|
|
|
|
| 140 |
return list(urls)[:limit]
|
| 141 |
|
| 142 |
def _analyze_page(self, url: str) -> Dict[str, Any]:
|
|
|
|
| 143 |
try:
|
| 144 |
response = self.session.get(url, timeout=15)
|
| 145 |
if response.status_code != 200:
|
|
|
|
| 204 |
return soup.get_text()
|
| 205 |
|
| 206 |
def _detect_cta(self, soup: BeautifulSoup) -> bool:
|
|
|
|
| 207 |
text_content = soup.get_text().lower()
|
| 208 |
|
| 209 |
for keyword in self.cta_keywords:
|
|
|
|
| 220 |
return False
|
| 221 |
|
| 222 |
def _get_last_modified(self, headers: Dict, soup: BeautifulSoup) -> str:
|
|
|
|
| 223 |
# Check headers first
|
| 224 |
if 'last-modified' in headers:
|
| 225 |
return headers['last-modified']
|
|
|
|
| 234 |
return ""
|
| 235 |
|
| 236 |
def _is_valid_content_url(self, url: str) -> bool:
|
|
|
|
| 237 |
if not url:
|
| 238 |
return False
|
| 239 |
|
|
|
|
| 254 |
return True
|
| 255 |
|
| 256 |
def _is_same_domain(self, url1: str, url2: str) -> bool:
|
|
|
|
| 257 |
try:
|
| 258 |
domain1 = urlparse(url1).netloc
|
| 259 |
domain2 = urlparse(url2).netloc
|
|
|
|
| 262 |
return False
|
| 263 |
|
| 264 |
def _calculate_metrics(self, base_url: str, pages_data: List[Dict], quick_scan: bool) -> Dict[str, Any]:
|
|
|
|
| 265 |
total_pages = len(pages_data)
|
| 266 |
valid_pages = [p for p in pages_data if 'error' not in p]
|
| 267 |
|
|
|
|
| 309 |
}
|
| 310 |
|
| 311 |
def _analyze_content_freshness(self, pages_data: List[Dict]) -> Dict[str, Any]:
|
|
|
|
| 312 |
now = datetime.now()
|
| 313 |
six_months_ago = now - timedelta(days=180)
|
| 314 |
eighteen_months_ago = now - timedelta(days=540)
|
|
|
|
| 351 |
}
|
| 352 |
|
| 353 |
def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
|
|
|
|
| 354 |
return {
|
| 355 |
'url': url,
|
| 356 |
'error': f"Content audit failed: {error}",
|
modules/keywords.py
CHANGED
|
@@ -118,13 +118,11 @@ class KeywordsModule:
|
|
| 118 |
)
|
| 119 |
|
| 120 |
def _extract_domain(self, url: str) -> str:
|
| 121 |
-
"""Extract domain from URL"""
|
| 122 |
if not url.startswith(('http://', 'https://')):
|
| 123 |
url = 'https://' + url
|
| 124 |
return urlparse(url).netloc.replace('www.', '')
|
| 125 |
|
| 126 |
def _fetch_domain_keywords(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
|
| 127 |
-
"""Fetch keywords data for a domain using Competitors Ranking Keywords API"""
|
| 128 |
try:
|
| 129 |
all_keywords = []
|
| 130 |
offset = 0
|
|
@@ -187,7 +185,6 @@ class KeywordsModule:
|
|
| 187 |
return {'success': False, 'error': str(e)}
|
| 188 |
|
| 189 |
def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
|
| 190 |
-
"""Calculate domain statistics from keywords data"""
|
| 191 |
total_keywords = len(keywords)
|
| 192 |
|
| 193 |
# Position distribution
|
|
@@ -221,7 +218,6 @@ class KeywordsModule:
|
|
| 221 |
|
| 222 |
def _process_keywords_data(self, main_data: Dict, competitor_data: Dict,
|
| 223 |
domain: str, competitor_domains: List[str]) -> Dict[str, Any]:
|
| 224 |
-
"""Process and structure the keywords data"""
|
| 225 |
stats = main_data['statistics']['organic']
|
| 226 |
keywords = main_data['keywords']
|
| 227 |
|
|
@@ -288,7 +284,6 @@ class KeywordsModule:
|
|
| 288 |
}
|
| 289 |
|
| 290 |
def _identify_best_keywords(self, keywords: List[Dict]) -> List[Dict]:
|
| 291 |
-
"""Identify best performing keywords"""
|
| 292 |
best_candidates = [
|
| 293 |
k for k in keywords
|
| 294 |
if k.get('rank', 100) <= 3 and k.get('estimated_traffic_volume', 0) > 10
|
|
@@ -310,7 +305,6 @@ class KeywordsModule:
|
|
| 310 |
]
|
| 311 |
|
| 312 |
def _identify_declining_keywords(self, keywords: List[Dict]) -> List[Dict]:
|
| 313 |
-
"""Identify keywords with declining performance"""
|
| 314 |
declining_candidates = []
|
| 315 |
|
| 316 |
for k in keywords:
|
|
@@ -333,7 +327,6 @@ class KeywordsModule:
|
|
| 333 |
|
| 334 |
def _analyze_competitor_gaps(self, main_keywords: List[Dict], competitor_data: Dict,
|
| 335 |
domain: str, competitor_domains: List[str]) -> Tuple[List[Dict], List[Dict]]:
|
| 336 |
-
"""Analyze competitor gaps and opportunities"""
|
| 337 |
opportunities = []
|
| 338 |
competitor_summary = []
|
| 339 |
|
|
@@ -385,10 +378,9 @@ class KeywordsModule:
|
|
| 385 |
# Sort all opportunities by priority score
|
| 386 |
opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
|
| 387 |
|
| 388 |
-
return opportunities[:50], competitor_summary
|
| 389 |
|
| 390 |
def _calculate_opportunity_score(self, competitor_rank: int, search_volume: int, difficulty: int) -> float:
|
| 391 |
-
"""Calculate opportunity score using the PRD algorithm"""
|
| 392 |
position_ctr = {1: 28, 2: 15, 3: 11, 4: 8, 5: 7, 10: 2, 20: 1}
|
| 393 |
|
| 394 |
# Find closest CTR value
|
|
@@ -406,7 +398,6 @@ class KeywordsModule:
|
|
| 406 |
return min(round(score, 1), 100)
|
| 407 |
|
| 408 |
def _estimate_difficulty(self, rank: int, volume: int) -> int:
|
| 409 |
-
"""Estimate keyword difficulty based on rank and volume"""
|
| 410 |
# Simple heuristic - in practice, this would come from a keyword difficulty API
|
| 411 |
if rank <= 3:
|
| 412 |
return 20 + (volume // 1000) * 5
|
|
@@ -416,7 +407,6 @@ class KeywordsModule:
|
|
| 416 |
return 50 + (volume // 1000) * 2
|
| 417 |
|
| 418 |
def _enrich_keywords_data(self, keywords: List[Dict]) -> List[Dict]:
|
| 419 |
-
"""Enrich keywords with volume and CPC data"""
|
| 420 |
# Identify keywords needing enrichment
|
| 421 |
keywords_to_enrich = [
|
| 422 |
k for k in keywords
|
|
@@ -445,7 +435,6 @@ class KeywordsModule:
|
|
| 445 |
return enriched_keywords
|
| 446 |
|
| 447 |
def _batch_enrich_keywords(self, keywords: List[str]) -> Dict[str, Dict]:
|
| 448 |
-
"""Batch enrich keywords using Google Keyword Insight API"""
|
| 449 |
enriched_data = {}
|
| 450 |
|
| 451 |
# Process in batches
|
|
@@ -518,17 +507,14 @@ class KeywordsModule:
|
|
| 518 |
return enriched_data
|
| 519 |
|
| 520 |
def _get_cache_key(self, keyword: str) -> str:
|
| 521 |
-
"""Generate cache key for keyword"""
|
| 522 |
return hashlib.md5(keyword.lower().encode()).hexdigest()
|
| 523 |
|
| 524 |
def _calculate_enrichment_rate(self, keywords: List[Dict]) -> float:
|
| 525 |
-
"""Calculate the percentage of keywords with volume data"""
|
| 526 |
enriched = sum(1 for k in keywords if k.get('avg_search_volume', 0) > 0)
|
| 527 |
total = len(keywords)
|
| 528 |
return round(enriched / total * 100, 1) if total > 0 else 0
|
| 529 |
|
| 530 |
def _determine_trend(self, keyword_data: Dict) -> str:
|
| 531 |
-
"""Determine keyword trend based on rank changes"""
|
| 532 |
current_rank = keyword_data.get('rank', 100)
|
| 533 |
previous_rank = keyword_data.get('previous_rank', 100)
|
| 534 |
|
|
@@ -542,13 +528,11 @@ class KeywordsModule:
|
|
| 542 |
return 'stable'
|
| 543 |
|
| 544 |
def _rate_limit_primary_api(self):
|
| 545 |
-
"""Rate limiting for primary API (60 requests/minute)"""
|
| 546 |
current_time = time.time()
|
| 547 |
-
if current_time - self.last_primary_call < 1:
|
| 548 |
time.sleep(1)
|
| 549 |
|
| 550 |
def _rate_limit_enrichment_api(self):
|
| 551 |
-
"""Rate limiting for enrichment API (100 requests/minute)"""
|
| 552 |
current_time = time.time()
|
| 553 |
-
if current_time - self.last_enrichment_call < 0.6:
|
| 554 |
time.sleep(0.6)
|
|
|
|
| 118 |
)
|
| 119 |
|
| 120 |
def _extract_domain(self, url: str) -> str:
|
|
|
|
| 121 |
if not url.startswith(('http://', 'https://')):
|
| 122 |
url = 'https://' + url
|
| 123 |
return urlparse(url).netloc.replace('www.', '')
|
| 124 |
|
| 125 |
def _fetch_domain_keywords(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
|
|
|
|
| 126 |
try:
|
| 127 |
all_keywords = []
|
| 128 |
offset = 0
|
|
|
|
| 185 |
return {'success': False, 'error': str(e)}
|
| 186 |
|
| 187 |
def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
|
|
|
|
| 188 |
total_keywords = len(keywords)
|
| 189 |
|
| 190 |
# Position distribution
|
|
|
|
| 218 |
|
| 219 |
def _process_keywords_data(self, main_data: Dict, competitor_data: Dict,
|
| 220 |
domain: str, competitor_domains: List[str]) -> Dict[str, Any]:
|
|
|
|
| 221 |
stats = main_data['statistics']['organic']
|
| 222 |
keywords = main_data['keywords']
|
| 223 |
|
|
|
|
| 284 |
}
|
| 285 |
|
| 286 |
def _identify_best_keywords(self, keywords: List[Dict]) -> List[Dict]:
|
|
|
|
| 287 |
best_candidates = [
|
| 288 |
k for k in keywords
|
| 289 |
if k.get('rank', 100) <= 3 and k.get('estimated_traffic_volume', 0) > 10
|
|
|
|
| 305 |
]
|
| 306 |
|
| 307 |
def _identify_declining_keywords(self, keywords: List[Dict]) -> List[Dict]:
|
|
|
|
| 308 |
declining_candidates = []
|
| 309 |
|
| 310 |
for k in keywords:
|
|
|
|
| 327 |
|
| 328 |
def _analyze_competitor_gaps(self, main_keywords: List[Dict], competitor_data: Dict,
|
| 329 |
domain: str, competitor_domains: List[str]) -> Tuple[List[Dict], List[Dict]]:
|
|
|
|
| 330 |
opportunities = []
|
| 331 |
competitor_summary = []
|
| 332 |
|
|
|
|
| 378 |
# Sort all opportunities by priority score
|
| 379 |
opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
|
| 380 |
|
| 381 |
+
return opportunities[:50], competitor_summary
|
| 382 |
|
| 383 |
def _calculate_opportunity_score(self, competitor_rank: int, search_volume: int, difficulty: int) -> float:
|
|
|
|
| 384 |
position_ctr = {1: 28, 2: 15, 3: 11, 4: 8, 5: 7, 10: 2, 20: 1}
|
| 385 |
|
| 386 |
# Find closest CTR value
|
|
|
|
| 398 |
return min(round(score, 1), 100)
|
| 399 |
|
| 400 |
def _estimate_difficulty(self, rank: int, volume: int) -> int:
|
|
|
|
| 401 |
# Simple heuristic - in practice, this would come from a keyword difficulty API
|
| 402 |
if rank <= 3:
|
| 403 |
return 20 + (volume // 1000) * 5
|
|
|
|
| 407 |
return 50 + (volume // 1000) * 2
|
| 408 |
|
| 409 |
def _enrich_keywords_data(self, keywords: List[Dict]) -> List[Dict]:
|
|
|
|
| 410 |
# Identify keywords needing enrichment
|
| 411 |
keywords_to_enrich = [
|
| 412 |
k for k in keywords
|
|
|
|
| 435 |
return enriched_keywords
|
| 436 |
|
| 437 |
def _batch_enrich_keywords(self, keywords: List[str]) -> Dict[str, Dict]:
|
|
|
|
| 438 |
enriched_data = {}
|
| 439 |
|
| 440 |
# Process in batches
|
|
|
|
| 507 |
return enriched_data
|
| 508 |
|
| 509 |
def _get_cache_key(self, keyword: str) -> str:
|
|
|
|
| 510 |
return hashlib.md5(keyword.lower().encode()).hexdigest()
|
| 511 |
|
| 512 |
def _calculate_enrichment_rate(self, keywords: List[Dict]) -> float:
|
|
|
|
| 513 |
enriched = sum(1 for k in keywords if k.get('avg_search_volume', 0) > 0)
|
| 514 |
total = len(keywords)
|
| 515 |
return round(enriched / total * 100, 1) if total > 0 else 0
|
| 516 |
|
| 517 |
def _determine_trend(self, keyword_data: Dict) -> str:
|
|
|
|
| 518 |
current_rank = keyword_data.get('rank', 100)
|
| 519 |
previous_rank = keyword_data.get('previous_rank', 100)
|
| 520 |
|
|
|
|
| 528 |
return 'stable'
|
| 529 |
|
| 530 |
def _rate_limit_primary_api(self):
|
|
|
|
| 531 |
current_time = time.time()
|
| 532 |
+
if current_time - self.last_primary_call < 1:
|
| 533 |
time.sleep(1)
|
| 534 |
|
| 535 |
def _rate_limit_enrichment_api(self):
|
|
|
|
| 536 |
current_time = time.time()
|
| 537 |
+
if current_time - self.last_enrichment_call < 0.6:
|
| 538 |
time.sleep(0.6)
|
modules/technical_seo.py
CHANGED
|
@@ -4,12 +4,6 @@ from typing import Dict, Any, Optional
|
|
| 4 |
|
| 5 |
class TechnicalSEOModule:
|
| 6 |
def __init__(self, api_key: Optional[str] = None):
|
| 7 |
-
"""
|
| 8 |
-
Initialize Technical SEO module
|
| 9 |
-
|
| 10 |
-
Args:
|
| 11 |
-
api_key: Google PageSpeed Insights API key (optional for basic usage)
|
| 12 |
-
"""
|
| 13 |
self.api_key = api_key
|
| 14 |
self.base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
|
| 15 |
|
|
@@ -45,7 +39,6 @@ class TechnicalSEOModule:
|
|
| 45 |
return self._get_fallback_data(url, str(e))
|
| 46 |
|
| 47 |
def _get_pagespeed_data(self, url: str, strategy: str) -> Dict[str, Any]:
|
| 48 |
-
"""Get PageSpeed Insights data for URL and strategy"""
|
| 49 |
params = {
|
| 50 |
'url': url,
|
| 51 |
'strategy': strategy,
|
|
@@ -64,7 +57,6 @@ class TechnicalSEOModule:
|
|
| 64 |
raise
|
| 65 |
|
| 66 |
def _extract_metrics(self, data: Dict[str, Any], strategy: str) -> Dict[str, Any]:
|
| 67 |
-
"""Extract key performance metrics from PageSpeed data"""
|
| 68 |
lighthouse_result = data.get('lighthouseResult', {})
|
| 69 |
categories = lighthouse_result.get('categories', {})
|
| 70 |
audits = lighthouse_result.get('audits', {})
|
|
@@ -91,7 +83,6 @@ class TechnicalSEOModule:
|
|
| 91 |
}
|
| 92 |
|
| 93 |
def _extract_core_web_vitals(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 94 |
-
"""Extract Core Web Vitals metrics"""
|
| 95 |
def get_metric_value(data, metric_key):
|
| 96 |
audits = data.get('lighthouseResult', {}).get('audits', {})
|
| 97 |
metric = audits.get(metric_key, {})
|
|
@@ -116,7 +107,6 @@ class TechnicalSEOModule:
|
|
| 116 |
}
|
| 117 |
|
| 118 |
def _extract_opportunities(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 119 |
-
"""Extract optimization opportunities"""
|
| 120 |
mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
|
| 121 |
|
| 122 |
opportunities = []
|
|
@@ -128,7 +118,7 @@ class TechnicalSEOModule:
|
|
| 128 |
|
| 129 |
for key in opportunity_keys:
|
| 130 |
audit = mobile_audits.get(key, {})
|
| 131 |
-
if audit.get('score', 1) < 0.9:
|
| 132 |
opportunities.append({
|
| 133 |
'id': key,
|
| 134 |
'title': audit.get('title', key.replace('-', ' ').title()),
|
|
@@ -137,10 +127,9 @@ class TechnicalSEOModule:
|
|
| 137 |
'potential_savings': audit.get('details', {}).get('overallSavingsMs', 0)
|
| 138 |
})
|
| 139 |
|
| 140 |
-
return {'opportunities': opportunities[:5]}
|
| 141 |
|
| 142 |
def _extract_diagnostics(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 143 |
-
"""Extract diagnostic information"""
|
| 144 |
mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
|
| 145 |
|
| 146 |
diagnostics = []
|
|
@@ -162,7 +151,6 @@ class TechnicalSEOModule:
|
|
| 162 |
return {'diagnostics': diagnostics}
|
| 163 |
|
| 164 |
def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
|
| 165 |
-
"""Return fallback data when API fails"""
|
| 166 |
return {
|
| 167 |
'url': url,
|
| 168 |
'error': f"PageSpeed API unavailable: {error}",
|
|
|
|
| 4 |
|
| 5 |
class TechnicalSEOModule:
|
| 6 |
def __init__(self, api_key: Optional[str] = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
self.api_key = api_key
|
| 8 |
self.base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
|
| 9 |
|
|
|
|
| 39 |
return self._get_fallback_data(url, str(e))
|
| 40 |
|
| 41 |
def _get_pagespeed_data(self, url: str, strategy: str) -> Dict[str, Any]:
|
|
|
|
| 42 |
params = {
|
| 43 |
'url': url,
|
| 44 |
'strategy': strategy,
|
|
|
|
| 57 |
raise
|
| 58 |
|
| 59 |
def _extract_metrics(self, data: Dict[str, Any], strategy: str) -> Dict[str, Any]:
|
|
|
|
| 60 |
lighthouse_result = data.get('lighthouseResult', {})
|
| 61 |
categories = lighthouse_result.get('categories', {})
|
| 62 |
audits = lighthouse_result.get('audits', {})
|
|
|
|
| 83 |
}
|
| 84 |
|
| 85 |
def _extract_core_web_vitals(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
| 86 |
def get_metric_value(data, metric_key):
|
| 87 |
audits = data.get('lighthouseResult', {}).get('audits', {})
|
| 88 |
metric = audits.get(metric_key, {})
|
|
|
|
| 107 |
}
|
| 108 |
|
| 109 |
def _extract_opportunities(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
| 110 |
mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
|
| 111 |
|
| 112 |
opportunities = []
|
|
|
|
| 118 |
|
| 119 |
for key in opportunity_keys:
|
| 120 |
audit = mobile_audits.get(key, {})
|
| 121 |
+
if audit.get('score', 1) < 0.9:
|
| 122 |
opportunities.append({
|
| 123 |
'id': key,
|
| 124 |
'title': audit.get('title', key.replace('-', ' ').title()),
|
|
|
|
| 127 |
'potential_savings': audit.get('details', {}).get('overallSavingsMs', 0)
|
| 128 |
})
|
| 129 |
|
| 130 |
+
return {'opportunities': opportunities[:5]}
|
| 131 |
|
| 132 |
def _extract_diagnostics(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
| 133 |
mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
|
| 134 |
|
| 135 |
diagnostics = []
|
|
|
|
| 151 |
return {'diagnostics': diagnostics}
|
| 152 |
|
| 153 |
def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
|
|
|
|
| 154 |
return {
|
| 155 |
'url': url,
|
| 156 |
'error': f"PageSpeed API unavailable: {error}",
|
report_generator.py
CHANGED
|
@@ -5,11 +5,45 @@ import plotly.graph_objects as go
|
|
| 5 |
import plotly.express as px
|
| 6 |
from plotly.offline import plot
|
| 7 |
import plotly
|
|
|
|
| 8 |
|
| 9 |
class ReportGenerator:
|
| 10 |
def __init__(self):
|
| 11 |
self.report_template = self._get_report_template()
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def generate_html_report(self, url: str, technical_data: Dict[str, Any],
|
| 14 |
content_data: Dict[str, Any], competitor_data: List[Dict] = None,
|
| 15 |
keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
|
|
@@ -44,8 +78,7 @@ class ReportGenerator:
|
|
| 44 |
if competitor_data:
|
| 45 |
competitor_section = self._generate_competitor_section(competitor_data, technical_data, content_data)
|
| 46 |
|
| 47 |
-
|
| 48 |
-
placeholder_sections = self._generate_placeholder_sections()
|
| 49 |
|
| 50 |
# Generate recommendations
|
| 51 |
recommendations = self._generate_recommendations(technical_data, content_data)
|
|
@@ -61,7 +94,7 @@ class ReportGenerator:
|
|
| 61 |
keywords_section=keywords_section,
|
| 62 |
backlinks_section=backlinks_section,
|
| 63 |
competitor_section=competitor_section,
|
| 64 |
-
|
| 65 |
recommendations=recommendations,
|
| 66 |
llm_recommendations=recommendations_section
|
| 67 |
)
|
|
@@ -538,50 +571,7 @@ class ReportGenerator:
|
|
| 538 |
|
| 539 |
return comparison_html
|
| 540 |
|
| 541 |
-
|
| 542 |
-
"""Generate placeholder sections for future modules"""
|
| 543 |
-
return """
|
| 544 |
-
<div class="placeholder-sections">
|
| 545 |
-
<div class="placeholder-section">
|
| 546 |
-
<h3>🔍 Keyword Rankings</h3>
|
| 547 |
-
<div class="placeholder-content">
|
| 548 |
-
<p><em>Coming in future versions</em></p>
|
| 549 |
-
<ul>
|
| 550 |
-
<li>Google Search Console integration</li>
|
| 551 |
-
<li>Keyword ranking positions</li>
|
| 552 |
-
<li>Search volume analysis</li>
|
| 553 |
-
<li>Keyword opportunities</li>
|
| 554 |
-
</ul>
|
| 555 |
-
</div>
|
| 556 |
-
</div>
|
| 557 |
-
|
| 558 |
-
<div class="placeholder-section">
|
| 559 |
-
<h3>🔗 Backlink Profile</h3>
|
| 560 |
-
<div class="placeholder-content">
|
| 561 |
-
<p><em>Coming in future versions</em></p>
|
| 562 |
-
<ul>
|
| 563 |
-
<li>Total backlinks and referring domains</li>
|
| 564 |
-
<li>Domain authority metrics</li>
|
| 565 |
-
<li>Anchor text analysis</li>
|
| 566 |
-
<li>Link acquisition opportunities</li>
|
| 567 |
-
</ul>
|
| 568 |
-
</div>
|
| 569 |
-
</div>
|
| 570 |
-
|
| 571 |
-
<div class="placeholder-section">
|
| 572 |
-
<h3>📈 Conversion Tracking</h3>
|
| 573 |
-
<div class="placeholder-content">
|
| 574 |
-
<p><em>Coming in future versions</em></p>
|
| 575 |
-
<ul>
|
| 576 |
-
<li>Google Analytics integration</li>
|
| 577 |
-
<li>Organic traffic conversion rates</li>
|
| 578 |
-
<li>Goal completion tracking</li>
|
| 579 |
-
<li>Revenue attribution</li>
|
| 580 |
-
</ul>
|
| 581 |
-
</div>
|
| 582 |
-
</div>
|
| 583 |
-
</div>
|
| 584 |
-
"""
|
| 585 |
|
| 586 |
def _generate_recommendations(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]) -> str:
|
| 587 |
"""Generate prioritized recommendations"""
|
|
@@ -830,11 +820,11 @@ class ReportGenerator:
|
|
| 830 |
"""
|
| 831 |
|
| 832 |
def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
|
| 833 |
-
"""Generate LLM-powered recommendations section"""
|
| 834 |
if not llm_recommendations:
|
| 835 |
return ""
|
| 836 |
|
| 837 |
-
|
| 838 |
executive_insights = llm_recommendations.get('executive_insights', [])
|
| 839 |
priority_actions = llm_recommendations.get('priority_actions', [])
|
| 840 |
|
|
@@ -861,12 +851,17 @@ class ReportGenerator:
|
|
| 861 |
"""
|
| 862 |
priority_html += "</div>"
|
| 863 |
|
|
|
|
| 864 |
recommendations_html = ""
|
| 865 |
-
if
|
| 866 |
-
recommendations_html = "
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 870 |
|
| 871 |
return f"""
|
| 872 |
<div class="card">
|
|
@@ -1258,6 +1253,56 @@ class ReportGenerator:
|
|
| 1258 |
text-align: center;
|
| 1259 |
}}
|
| 1260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1261 |
@media (max-width: 768px) {{
|
| 1262 |
.report-container {{
|
| 1263 |
padding: 10px;
|
|
|
|
| 5 |
import plotly.express as px
|
| 6 |
from plotly.offline import plot
|
| 7 |
import plotly
|
| 8 |
+
import re
|
| 9 |
|
| 10 |
class ReportGenerator:
|
| 11 |
def __init__(self):
|
| 12 |
self.report_template = self._get_report_template()
|
| 13 |
|
| 14 |
+
def _markdown_to_html(self, markdown_text: str) -> str:
|
| 15 |
+
"""Convert simple markdown to HTML"""
|
| 16 |
+
if not markdown_text:
|
| 17 |
+
return ""
|
| 18 |
+
|
| 19 |
+
html = markdown_text
|
| 20 |
+
|
| 21 |
+
# Convert headers
|
| 22 |
+
html = re.sub(r'^### (.*?)$', r'<h3>\1</h3>', html, flags=re.MULTILINE)
|
| 23 |
+
html = re.sub(r'^## (.*?)$', r'<h2>\1</h2>', html, flags=re.MULTILINE)
|
| 24 |
+
html = re.sub(r'^# (.*?)$', r'<h1>\1</h1>', html, flags=re.MULTILINE)
|
| 25 |
+
|
| 26 |
+
# Convert bold text
|
| 27 |
+
html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
|
| 28 |
+
|
| 29 |
+
# Convert bullet points
|
| 30 |
+
html = re.sub(r'^- (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
|
| 31 |
+
html = re.sub(r'^• (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
|
| 32 |
+
|
| 33 |
+
# Wrap consecutive <li> tags in <ul>
|
| 34 |
+
html = re.sub(r'(<li>.*?</li>(?:\s*<li>.*?</li>)*)', r'<ul>\1</ul>', html, flags=re.DOTALL)
|
| 35 |
+
|
| 36 |
+
# Convert line breaks to <br> tags
|
| 37 |
+
html = html.replace('\n', '<br>')
|
| 38 |
+
|
| 39 |
+
# Clean up extra <br> tags around block elements
|
| 40 |
+
html = re.sub(r'<br>\s*(<h[1-6]>)', r'\1', html)
|
| 41 |
+
html = re.sub(r'(</h[1-6]>)\s*<br>', r'\1', html)
|
| 42 |
+
html = re.sub(r'<br>\s*(<ul>)', r'\1', html)
|
| 43 |
+
html = re.sub(r'(</ul>)\s*<br>', r'\1', html)
|
| 44 |
+
|
| 45 |
+
return html
|
| 46 |
+
|
| 47 |
def generate_html_report(self, url: str, technical_data: Dict[str, Any],
|
| 48 |
content_data: Dict[str, Any], competitor_data: List[Dict] = None,
|
| 49 |
keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
|
|
|
|
| 78 |
if competitor_data:
|
| 79 |
competitor_section = self._generate_competitor_section(competitor_data, technical_data, content_data)
|
| 80 |
|
| 81 |
+
|
|
|
|
| 82 |
|
| 83 |
# Generate recommendations
|
| 84 |
recommendations = self._generate_recommendations(technical_data, content_data)
|
|
|
|
| 94 |
keywords_section=keywords_section,
|
| 95 |
backlinks_section=backlinks_section,
|
| 96 |
competitor_section=competitor_section,
|
| 97 |
+
|
| 98 |
recommendations=recommendations,
|
| 99 |
llm_recommendations=recommendations_section
|
| 100 |
)
|
|
|
|
| 571 |
|
| 572 |
return comparison_html
|
| 573 |
|
| 574 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
|
| 576 |
def _generate_recommendations(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]) -> str:
|
| 577 |
"""Generate prioritized recommendations"""
|
|
|
|
| 820 |
"""
|
| 821 |
|
| 822 |
def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
|
| 823 |
+
"""Generate LLM-powered recommendations section with markdown rendering"""
|
| 824 |
if not llm_recommendations:
|
| 825 |
return ""
|
| 826 |
|
| 827 |
+
recommendations_markdown = llm_recommendations.get('recommendations_markdown', '')
|
| 828 |
executive_insights = llm_recommendations.get('executive_insights', [])
|
| 829 |
priority_actions = llm_recommendations.get('priority_actions', [])
|
| 830 |
|
|
|
|
| 851 |
"""
|
| 852 |
priority_html += "</div>"
|
| 853 |
|
| 854 |
+
# Convert markdown recommendations to HTML
|
| 855 |
recommendations_html = ""
|
| 856 |
+
if recommendations_markdown:
|
| 857 |
+
recommendations_html = f"""
|
| 858 |
+
<div class='llm-recommendations'>
|
| 859 |
+
<h4>🤖 AI-Generated Recommendations</h4>
|
| 860 |
+
<div class="markdown-content">
|
| 861 |
+
{self._markdown_to_html(recommendations_markdown)}
|
| 862 |
+
</div>
|
| 863 |
+
</div>
|
| 864 |
+
"""
|
| 865 |
|
| 866 |
return f"""
|
| 867 |
<div class="card">
|
|
|
|
| 1253 |
text-align: center;
|
| 1254 |
}}
|
| 1255 |
|
| 1256 |
+
.markdown-content {{
|
| 1257 |
+
line-height: 1.6;
|
| 1258 |
+
color: #2c3e50;
|
| 1259 |
+
}}
|
| 1260 |
+
|
| 1261 |
+
.markdown-content h1 {{
|
| 1262 |
+
color: #2c3e50;
|
| 1263 |
+
border-bottom: 2px solid #3498db;
|
| 1264 |
+
padding-bottom: 10px;
|
| 1265 |
+
margin-top: 30px;
|
| 1266 |
+
margin-bottom: 20px;
|
| 1267 |
+
}}
|
| 1268 |
+
|
| 1269 |
+
.markdown-content h2 {{
|
| 1270 |
+
color: #34495e;
|
| 1271 |
+
margin-top: 25px;
|
| 1272 |
+
margin-bottom: 15px;
|
| 1273 |
+
font-size: 1.3em;
|
| 1274 |
+
}}
|
| 1275 |
+
|
| 1276 |
+
.markdown-content h3 {{
|
| 1277 |
+
color: #34495e;
|
| 1278 |
+
margin-top: 20px;
|
| 1279 |
+
margin-bottom: 10px;
|
| 1280 |
+
font-size: 1.1em;
|
| 1281 |
+
}}
|
| 1282 |
+
|
| 1283 |
+
.markdown-content strong {{
|
| 1284 |
+
color: #2c3e50;
|
| 1285 |
+
font-weight: 600;
|
| 1286 |
+
}}
|
| 1287 |
+
|
| 1288 |
+
.markdown-content ul {{
|
| 1289 |
+
margin: 15px 0;
|
| 1290 |
+
padding-left: 20px;
|
| 1291 |
+
}}
|
| 1292 |
+
|
| 1293 |
+
.markdown-content li {{
|
| 1294 |
+
margin-bottom: 8px;
|
| 1295 |
+
line-height: 1.5;
|
| 1296 |
+
}}
|
| 1297 |
+
|
| 1298 |
+
.llm-recommendations {{
|
| 1299 |
+
background: #f8f9fa;
|
| 1300 |
+
border-left: 4px solid #3498db;
|
| 1301 |
+
padding: 20px;
|
| 1302 |
+
margin: 20px 0;
|
| 1303 |
+
border-radius: 0 8px 8px 0;
|
| 1304 |
+
}}
|
| 1305 |
+
|
| 1306 |
@media (max-width: 768px) {{
|
| 1307 |
.report-container {{
|
| 1308 |
padding: 10px;
|
simple_pdf_generator.py
CHANGED
|
@@ -17,13 +17,10 @@ class SimplePDFGenerator:
|
|
| 17 |
self.available = False
|
| 18 |
|
| 19 |
def generate_pdf(self, html_content: str) -> bytes:
|
| 20 |
-
"""
|
| 21 |
-
Generate PDF from HTML content with better formatting
|
| 22 |
-
"""
|
| 23 |
if not self.available:
|
| 24 |
raise ImportError("PDF generation requires reportlab: pip install reportlab")
|
| 25 |
|
| 26 |
-
|
| 27 |
from reportlab.pdfgen import canvas
|
| 28 |
from reportlab.lib.pagesizes import letter, A4
|
| 29 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
@@ -36,10 +33,10 @@ class SimplePDFGenerator:
|
|
| 36 |
# Parse HTML and extract content
|
| 37 |
soup = BeautifulSoup(html_content, 'html.parser')
|
| 38 |
|
| 39 |
-
|
| 40 |
buffer = io.BytesIO()
|
| 41 |
|
| 42 |
-
|
| 43 |
doc = SimpleDocTemplate(
|
| 44 |
buffer,
|
| 45 |
pagesize=A4,
|
|
@@ -49,17 +46,17 @@ class SimplePDFGenerator:
|
|
| 49 |
rightMargin=0.75*inch
|
| 50 |
)
|
| 51 |
|
| 52 |
-
|
| 53 |
styles = getSampleStyleSheet()
|
| 54 |
|
| 55 |
-
|
| 56 |
title_style = ParagraphStyle(
|
| 57 |
'CustomTitle',
|
| 58 |
parent=styles['Heading1'],
|
| 59 |
fontSize=24,
|
| 60 |
textColor=black,
|
| 61 |
spaceAfter=20,
|
| 62 |
-
alignment=1
|
| 63 |
)
|
| 64 |
|
| 65 |
header_style = ParagraphStyle(
|
|
@@ -82,7 +79,7 @@ class SimplePDFGenerator:
|
|
| 82 |
|
| 83 |
story = []
|
| 84 |
|
| 85 |
-
|
| 86 |
title = "SEO Analysis Report"
|
| 87 |
url_elem = soup.find(string=re.compile(r'https?://'))
|
| 88 |
if url_elem:
|
|
@@ -93,13 +90,13 @@ class SimplePDFGenerator:
|
|
| 93 |
story.append(Paragraph(title, title_style))
|
| 94 |
story.append(Spacer(1, 20))
|
| 95 |
|
| 96 |
-
|
| 97 |
self._extract_executive_summary(soup, story, header_style, styles['Normal'])
|
| 98 |
self._extract_technical_seo(soup, story, header_style, subheader_style, styles['Normal'])
|
| 99 |
self._extract_content_audit(soup, story, header_style, subheader_style, styles['Normal'])
|
| 100 |
self._extract_recommendations(soup, story, header_style, styles['Normal'])
|
| 101 |
|
| 102 |
-
|
| 103 |
doc.build(story)
|
| 104 |
|
| 105 |
# Get PDF data
|
|
@@ -107,12 +104,11 @@ class SimplePDFGenerator:
|
|
| 107 |
return buffer.getvalue()
|
| 108 |
|
| 109 |
def _extract_executive_summary(self, soup, story, header_style, normal_style):
|
| 110 |
-
"""Extract executive summary section"""
|
| 111 |
exec_section = soup.find(string=re.compile(r'Executive Summary', re.I))
|
| 112 |
if exec_section:
|
| 113 |
story.append(Paragraph("Executive Summary", header_style))
|
| 114 |
|
| 115 |
-
|
| 116 |
health_text = soup.find(string=re.compile(r'Overall SEO Health', re.I))
|
| 117 |
if health_text:
|
| 118 |
parent = health_text.find_parent()
|
|
@@ -122,14 +118,13 @@ class SimplePDFGenerator:
|
|
| 122 |
story.append(Spacer(1, 10))
|
| 123 |
|
| 124 |
def _extract_technical_seo(self, soup, story, header_style, subheader_style, normal_style):
|
| 125 |
-
"""Extract technical SEO section"""
|
| 126 |
tech_section = soup.find(string=re.compile(r'Technical SEO', re.I))
|
| 127 |
if tech_section:
|
| 128 |
story.append(Paragraph("Technical SEO Analysis", header_style))
|
| 129 |
|
| 130 |
-
|
| 131 |
perf_elements = soup.find_all(string=re.compile(r'Performance Score|Mobile|Desktop', re.I))
|
| 132 |
-
for elem in perf_elements[:3]:
|
| 133 |
parent = elem.find_parent()
|
| 134 |
if parent:
|
| 135 |
text = parent.get_text().strip()
|
|
@@ -138,14 +133,13 @@ class SimplePDFGenerator:
|
|
| 138 |
story.append(Spacer(1, 10))
|
| 139 |
|
| 140 |
def _extract_content_audit(self, soup, story, header_style, subheader_style, normal_style):
|
| 141 |
-
"""Extract content audit section"""
|
| 142 |
content_section = soup.find(string=re.compile(r'Content Audit', re.I))
|
| 143 |
if content_section:
|
| 144 |
story.append(Paragraph("Content Audit", header_style))
|
| 145 |
|
| 146 |
-
|
| 147 |
content_elements = soup.find_all(string=re.compile(r'Pages Analyzed|Metadata|Word Count', re.I))
|
| 148 |
-
for elem in content_elements[:3]:
|
| 149 |
parent = elem.find_parent()
|
| 150 |
if parent:
|
| 151 |
text = parent.get_text().strip()
|
|
@@ -154,23 +148,19 @@ class SimplePDFGenerator:
|
|
| 154 |
story.append(Spacer(1, 10))
|
| 155 |
|
| 156 |
def _extract_recommendations(self, soup, story, header_style, normal_style):
|
| 157 |
-
"""Extract recommendations section"""
|
| 158 |
rec_section = soup.find(string=re.compile(r'Recommendation', re.I))
|
| 159 |
if rec_section:
|
| 160 |
story.append(Paragraph("Recommendations", header_style))
|
| 161 |
|
| 162 |
-
|
| 163 |
rec_elements = soup.find_all('li')
|
| 164 |
-
for elem in rec_elements[:5]:
|
| 165 |
text = elem.get_text().strip()
|
| 166 |
if len(text) > 15:
|
| 167 |
story.append(Paragraph(f"• {text}", normal_style))
|
| 168 |
story.append(Spacer(1, 10))
|
| 169 |
|
| 170 |
def create_browser_pdf_instructions() -> str:
|
| 171 |
-
"""
|
| 172 |
-
Return instructions for manual PDF creation using browser
|
| 173 |
-
"""
|
| 174 |
return """
|
| 175 |
## How to Create PDF from HTML Report:
|
| 176 |
|
|
|
|
| 17 |
self.available = False
|
| 18 |
|
| 19 |
def generate_pdf(self, html_content: str) -> bytes:
|
|
|
|
|
|
|
|
|
|
| 20 |
if not self.available:
|
| 21 |
raise ImportError("PDF generation requires reportlab: pip install reportlab")
|
| 22 |
|
| 23 |
+
|
| 24 |
from reportlab.pdfgen import canvas
|
| 25 |
from reportlab.lib.pagesizes import letter, A4
|
| 26 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
|
|
| 33 |
# Parse HTML and extract content
|
| 34 |
soup = BeautifulSoup(html_content, 'html.parser')
|
| 35 |
|
| 36 |
+
|
| 37 |
buffer = io.BytesIO()
|
| 38 |
|
| 39 |
+
|
| 40 |
doc = SimpleDocTemplate(
|
| 41 |
buffer,
|
| 42 |
pagesize=A4,
|
|
|
|
| 46 |
rightMargin=0.75*inch
|
| 47 |
)
|
| 48 |
|
| 49 |
+
|
| 50 |
styles = getSampleStyleSheet()
|
| 51 |
|
| 52 |
+
|
| 53 |
title_style = ParagraphStyle(
|
| 54 |
'CustomTitle',
|
| 55 |
parent=styles['Heading1'],
|
| 56 |
fontSize=24,
|
| 57 |
textColor=black,
|
| 58 |
spaceAfter=20,
|
| 59 |
+
alignment=1
|
| 60 |
)
|
| 61 |
|
| 62 |
header_style = ParagraphStyle(
|
|
|
|
| 79 |
|
| 80 |
story = []
|
| 81 |
|
| 82 |
+
|
| 83 |
title = "SEO Analysis Report"
|
| 84 |
url_elem = soup.find(string=re.compile(r'https?://'))
|
| 85 |
if url_elem:
|
|
|
|
| 90 |
story.append(Paragraph(title, title_style))
|
| 91 |
story.append(Spacer(1, 20))
|
| 92 |
|
| 93 |
+
|
| 94 |
self._extract_executive_summary(soup, story, header_style, styles['Normal'])
|
| 95 |
self._extract_technical_seo(soup, story, header_style, subheader_style, styles['Normal'])
|
| 96 |
self._extract_content_audit(soup, story, header_style, subheader_style, styles['Normal'])
|
| 97 |
self._extract_recommendations(soup, story, header_style, styles['Normal'])
|
| 98 |
|
| 99 |
+
|
| 100 |
doc.build(story)
|
| 101 |
|
| 102 |
# Get PDF data
|
|
|
|
| 104 |
return buffer.getvalue()
|
| 105 |
|
| 106 |
def _extract_executive_summary(self, soup, story, header_style, normal_style):
|
|
|
|
| 107 |
exec_section = soup.find(string=re.compile(r'Executive Summary', re.I))
|
| 108 |
if exec_section:
|
| 109 |
story.append(Paragraph("Executive Summary", header_style))
|
| 110 |
|
| 111 |
+
|
| 112 |
health_text = soup.find(string=re.compile(r'Overall SEO Health', re.I))
|
| 113 |
if health_text:
|
| 114 |
parent = health_text.find_parent()
|
|
|
|
| 118 |
story.append(Spacer(1, 10))
|
| 119 |
|
| 120 |
def _extract_technical_seo(self, soup, story, header_style, subheader_style, normal_style):
|
|
|
|
| 121 |
tech_section = soup.find(string=re.compile(r'Technical SEO', re.I))
|
| 122 |
if tech_section:
|
| 123 |
story.append(Paragraph("Technical SEO Analysis", header_style))
|
| 124 |
|
| 125 |
+
|
| 126 |
perf_elements = soup.find_all(string=re.compile(r'Performance Score|Mobile|Desktop', re.I))
|
| 127 |
+
for elem in perf_elements[:3]:
|
| 128 |
parent = elem.find_parent()
|
| 129 |
if parent:
|
| 130 |
text = parent.get_text().strip()
|
|
|
|
| 133 |
story.append(Spacer(1, 10))
|
| 134 |
|
| 135 |
def _extract_content_audit(self, soup, story, header_style, subheader_style, normal_style):
|
|
|
|
| 136 |
content_section = soup.find(string=re.compile(r'Content Audit', re.I))
|
| 137 |
if content_section:
|
| 138 |
story.append(Paragraph("Content Audit", header_style))
|
| 139 |
|
| 140 |
+
|
| 141 |
content_elements = soup.find_all(string=re.compile(r'Pages Analyzed|Metadata|Word Count', re.I))
|
| 142 |
+
for elem in content_elements[:3]:
|
| 143 |
parent = elem.find_parent()
|
| 144 |
if parent:
|
| 145 |
text = parent.get_text().strip()
|
|
|
|
| 148 |
story.append(Spacer(1, 10))
|
| 149 |
|
| 150 |
def _extract_recommendations(self, soup, story, header_style, normal_style):
|
|
|
|
| 151 |
rec_section = soup.find(string=re.compile(r'Recommendation', re.I))
|
| 152 |
if rec_section:
|
| 153 |
story.append(Paragraph("Recommendations", header_style))
|
| 154 |
|
| 155 |
+
|
| 156 |
rec_elements = soup.find_all('li')
|
| 157 |
+
for elem in rec_elements[:5]:
|
| 158 |
text = elem.get_text().strip()
|
| 159 |
if len(text) > 15:
|
| 160 |
story.append(Paragraph(f"• {text}", normal_style))
|
| 161 |
story.append(Spacer(1, 10))
|
| 162 |
|
| 163 |
def create_browser_pdf_instructions() -> str:
|
|
|
|
|
|
|
|
|
|
| 164 |
return """
|
| 165 |
## How to Create PDF from HTML Report:
|
| 166 |
|