Spaces:

yashgori20
/

ThinklySEO

Running

App Files Files Community

yashgori20 commited on Aug 23

Commit

9bf19c4

1 Parent(s): 795900a

ok

Browse files

Files changed (8) hide show

app.py +28 -30
llm_recommendations.py +90 -67
modules/backlinks.py +11 -16
modules/content_audit.py +0 -11
modules/keywords.py +3 -19
modules/technical_seo.py +2 -14
report_generator.py +99 -54
simple_pdf_generator.py +16 -26

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Thinkly Labs SEO - Flask Application
 from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for
 import validators
 import os
@@ -7,7 +7,7 @@ import uuid
 from urllib.parse import urlparse
 from typing import Dict, Any
-# Import SEO modules
 from modules.technical_seo import TechnicalSEOModule
 from modules.content_audit import ContentAuditModule
 from modules.keywords import KeywordsModule
@@ -19,7 +19,7 @@ from llm_recommendations import LLMRecommendations
 app = Flask(__name__, static_folder='static')
 app.secret_key = 'seo_report_generator_2024'
-# Initialize modules
 technical_module = TechnicalSEOModule()
 content_module = ContentAuditModule()
 keywords_module = KeywordsModule()
@@ -28,11 +28,10 @@ report_gen = ReportGenerator()
 pdf_gen = SimplePDFGenerator()
 llm_recommendations = LLMRecommendations()
-# Store for generated reports (in production, use database)
 reports_store = {}
 def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
-    """Transform new keywords data structure to match report generator expectations"""
     if not new_data or new_data.get('placeholder'):
         return {
             'placeholder': True,
@@ -44,7 +43,7 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
             'data_source': 'Analysis failed'
         }
-    # Transform new structure to old structure
     totals = new_data.get('totals', {})
     distribution = new_data.get('distribution', {})
     movement = new_data.get('movement', {})
@@ -53,7 +52,7 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
     opportunities = new_data.get('opportunities', [])
     data_sources = new_data.get('data_sources', {})
-    # Transform position distribution
     pos_dist = {
         'top_3': distribution.get('top3', 0),
         'top_10': distribution.get('top10', 0),
@@ -61,27 +60,27 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
         'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0)
     }
-    # Transform best keywords to match expected format
     transformed_best_keywords = []
     for kw in best_keywords:
         transformed_best_keywords.append({
             'keyword': kw.get('keyword', ''),
             'position': kw.get('rank', 0),
-            'clicks': 0,  # Not available in new API
             'impressions': kw.get('volume', 0),
             'url': kw.get('url', ''),
             'estimated_traffic': kw.get('estimated_traffic', 0),
             'trend': kw.get('trend', 'stable')
         })
-    # Transform opportunities to match expected format
     transformed_opportunities = []
     for opp in opportunities:
         transformed_opportunities.append({
             'keyword': opp.get('keyword', ''),
-            'position': 0,  # Not applicable for opportunities
             'impressions': opp.get('volume', 0),
-            'ctr': 0,  # Not available
             'competitor_rank': opp.get('competitor_rank', 0),
             'priority_score': opp.get('priority_score', 0),
             'competitor_domain': opp.get('competitor_domain', '')
@@ -119,30 +118,30 @@ def generate_report():
         if not validators.url(url):
             return jsonify({'error': 'Please enter a valid URL'}), 400
-        # Generate unique report ID
         report_id = str(uuid.uuid4())
-        # Validate competitor URLs and extract domains
         competitor_domains = []
         competitor_list = []
         for comp in competitors:
             comp = comp.strip()
             if comp and validators.url(comp):
                 competitor_list.append(comp)
-                # Extract domain from competitor URL
                 domain = urlparse(comp).netloc.replace('www.', '')
                 competitor_domains.append(domain)
-        # Technical SEO Analysis
         technical_data = technical_module.analyze(url)
-        # Content Audit
         content_data = content_module.analyze(url)
-        # Keywords Analysis - UPDATED: Pass competitor domains and handle errors
         keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains)
         if not keywords_result.success:
-            # Fallback to placeholder data if keywords analysis fails
             keywords_data = {
                 'placeholder': True,
                 'message': f'Keywords analysis failed: {keywords_result.error}',
@@ -153,10 +152,10 @@ def generate_report():
                 'data_source': 'Analysis failed'
             }
         else:
-            # Transform new data structure to match report generator expectations
             keywords_data = _transform_keywords_data(keywords_result.data)
-        # Backlinks Analysis - UNCOMMENTED: Enable backlinks analysis
         print(f"DEBUG: Starting backlinks analysis for {url}")
         backlinks_result = backlinks_module.analyze(url)
         backlinks_data = backlinks_result.data
@@ -167,18 +166,18 @@ def generate_report():
         if backlinks_data.get('placeholder'):
             print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
-        # Generate LLM Recommendations
         llm_rec_data = llm_recommendations.generate_recommendations(
             url, technical_data, content_data, keywords_data, backlinks_data
         )
-        # Competitor Analysis - UPDATED: Pass competitor domains
         competitor_data = []
         for comp_url in competitor_list:
             comp_technical = technical_module.analyze(comp_url)
             comp_content = content_module.analyze(comp_url, quick_scan=True)
-            # Keywords analysis for competitor (no competitors for competitor analysis)
             comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True)
             if comp_keywords_result.success:
                 comp_keywords = _transform_keywords_data(comp_keywords_result.data)
@@ -193,7 +192,7 @@ def generate_report():
                     'data_source': 'Analysis failed'
                 }
-            # Backlinks analysis for competitor - UNCOMMENTED
             comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True)
             comp_backlinks = comp_backlinks_result.data
@@ -205,7 +204,7 @@ def generate_report():
                 'backlinks': comp_backlinks
             })
-        # Generate HTML report
         report_html = report_gen.generate_html_report(
             url=url,
             technical_data=technical_data,
@@ -217,7 +216,7 @@ def generate_report():
             include_charts=True
         )
-        # Store report
         reports_store[report_id] = {
             'url': url,
             'html': report_html,
@@ -256,7 +255,6 @@ def download_html(report_id):
     report_data = reports_store[report_id]
-    # Create temporary file
     with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
         f.write(report_data['html'])
         temp_path = f.name
@@ -273,10 +271,10 @@ def download_pdf(report_id):
     try:
         report_data = reports_store[report_id]
-        # Generate PDF
         pdf_data = pdf_gen.generate_pdf(report_data['html'])
-        # Create temporary file
         with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
             f.write(pdf_data)
             temp_path = f.name

 from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for
 import validators
 import os
 from urllib.parse import urlparse
 from typing import Dict, Any
 from modules.technical_seo import TechnicalSEOModule
 from modules.content_audit import ContentAuditModule
 from modules.keywords import KeywordsModule
 app = Flask(__name__, static_folder='static')
 app.secret_key = 'seo_report_generator_2024'
 technical_module = TechnicalSEOModule()
 content_module = ContentAuditModule()
 keywords_module = KeywordsModule()
 pdf_gen = SimplePDFGenerator()
 llm_recommendations = LLMRecommendations()
 reports_store = {}
 def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
     if not new_data or new_data.get('placeholder'):
         return {
             'placeholder': True,
             'data_source': 'Analysis failed'
         }
     totals = new_data.get('totals', {})
     distribution = new_data.get('distribution', {})
     movement = new_data.get('movement', {})
     opportunities = new_data.get('opportunities', [])
     data_sources = new_data.get('data_sources', {})
     pos_dist = {
         'top_3': distribution.get('top3', 0),
         'top_10': distribution.get('top10', 0),
         'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0)
     }
     transformed_best_keywords = []
     for kw in best_keywords:
         transformed_best_keywords.append({
             'keyword': kw.get('keyword', ''),
             'position': kw.get('rank', 0),
+            'clicks': 0,
             'impressions': kw.get('volume', 0),
             'url': kw.get('url', ''),
             'estimated_traffic': kw.get('estimated_traffic', 0),
             'trend': kw.get('trend', 'stable')
         })
     transformed_opportunities = []
     for opp in opportunities:
         transformed_opportunities.append({
             'keyword': opp.get('keyword', ''),
+            'position': 0,
             'impressions': opp.get('volume', 0),
+            'ctr': 0,
             'competitor_rank': opp.get('competitor_rank', 0),
             'priority_score': opp.get('priority_score', 0),
             'competitor_domain': opp.get('competitor_domain', '')
         if not validators.url(url):
             return jsonify({'error': 'Please enter a valid URL'}), 400
         report_id = str(uuid.uuid4())
         competitor_domains = []
         competitor_list = []
         for comp in competitors:
             comp = comp.strip()
             if comp and validators.url(comp):
                 competitor_list.append(comp)
                 domain = urlparse(comp).netloc.replace('www.', '')
                 competitor_domains.append(domain)
         technical_data = technical_module.analyze(url)
         content_data = content_module.analyze(url)
         keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains)
         if not keywords_result.success:
             keywords_data = {
                 'placeholder': True,
                 'message': f'Keywords analysis failed: {keywords_result.error}',
                 'data_source': 'Analysis failed'
             }
         else:
             keywords_data = _transform_keywords_data(keywords_result.data)
         print(f"DEBUG: Starting backlinks analysis for {url}")
         backlinks_result = backlinks_module.analyze(url)
         backlinks_data = backlinks_result.data
         if backlinks_data.get('placeholder'):
             print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
         llm_rec_data = llm_recommendations.generate_recommendations(
             url, technical_data, content_data, keywords_data, backlinks_data
         )
         competitor_data = []
         for comp_url in competitor_list:
             comp_technical = technical_module.analyze(comp_url)
             comp_content = content_module.analyze(comp_url, quick_scan=True)
             comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True)
             if comp_keywords_result.success:
                 comp_keywords = _transform_keywords_data(comp_keywords_result.data)
                     'data_source': 'Analysis failed'
                 }
             comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True)
             comp_backlinks = comp_backlinks_result.data
                 'backlinks': comp_backlinks
             })
         report_html = report_gen.generate_html_report(
             url=url,
             technical_data=technical_data,
             include_charts=True
         )
         reports_store[report_id] = {
             'url': url,
             'html': report_html,
     report_data = reports_store[report_id]
     with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
         f.write(report_data['html'])
         temp_path = f.name
     try:
         report_data = reports_store[report_id]
         pdf_data = pdf_gen.generate_pdf(report_data['html'])
         with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
             f.write(pdf_data)
             temp_path = f.name

llm_recommendations.py CHANGED Viewed

@@ -1,7 +1,4 @@
-"""
-Groq LLM Integration for Smart SEO Recommendations
-Analyzes all 4 modules (Technical SEO, Content Audit, Keywords, Backlinks) to generate intelligent recommendations
-"""
 import os
 import json
@@ -9,7 +6,7 @@ from typing import Dict, Any, List
 from groq import Groq
 from dotenv import load_dotenv
-# Load environment variables
 load_dotenv()
@@ -25,33 +22,20 @@ class LLMRecommendations:
     def generate_recommendations(self, url: str, technical_data: Dict[str, Any],
                                content_data: Dict[str, Any], keywords_data: Dict[str, Any],
                                backlinks_data: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Generate comprehensive SEO recommendations based on all module data
-        Args:
-            url: Target website URL
-            technical_data: Technical SEO analysis results
-            content_data: Content audit results
-            keywords_data: Keywords analysis results
-            backlinks_data: Backlinks analysis results
-        Returns:
-            Dictionary with recommendations and insights
-        """
         if not self.available:
             return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data)
         try:
-            # Prepare context data for LLM
             context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data)
-            # Generate recommendations using Groq
             recommendations = self._query_llm(context)
             return {
-                'recommendations': recommendations,
                 'executive_insights': self._generate_executive_insights(context),
-                'priority_actions': self._extract_priority_actions(recommendations),
                 'data_source': 'Groq LLM Analysis',
                 'generated_at': context['analysis_date']
             }
@@ -61,9 +45,8 @@ class LLMRecommendations:
     def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict,
                         keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]:
-        """Prepare structured context for LLM analysis"""
-        # Extract key metrics from each module
         context = {
             'website': url,
             'analysis_date': technical_data.get('last_updated', ''),
@@ -101,7 +84,6 @@ class LLMRecommendations:
         return context
     def _query_llm(self, context: Dict[str, Any]) -> List[str]:
-        """Query Groq LLM for SEO recommendations"""
         prompt = f"""
 You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations.
@@ -143,12 +125,18 @@ CRITICAL INSTRUCTIONS:
 5. Prioritize recommendations by potential impact and ease of implementation
 6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies
 7. Provide estimated timelines and resources needed for each recommendation
-Generate exactly 8-12 specific recommendations in this format:
-- **[Priority Level]** [Specific Action]: [Detailed explanation with steps and expected impact]
 Priority Levels: HIGH, MEDIUM, LOW
 Focus on actionable items that can be implemented within 30-90 days.
 Response:
 """
@@ -158,35 +146,25 @@ Response:
                 messages=[
                     {'role': 'user', 'content': prompt}
                 ],
-                model="mixtral-8x7b-32768",  # Using Mixtral for better reasoning
                 stream=False,
-                temperature=0.1,  # Low temperature for consistent, focused recommendations
                 max_tokens=1500
             )
             response = chat_completion.choices[0].message.content.strip()
-            # Parse recommendations from response
-            recommendations = []
-            lines = response.split('\n')
-            for line in lines:
-                line = line.strip()
-                if line.startswith('- **') or line.startswith('•'):
-                    # Clean up the recommendation
-                    recommendation = line.replace('- **', '').replace('• **', '').strip()
-                    if recommendation:
-                        recommendations.append(recommendation)
-            return recommendations if recommendations else [response]
         except Exception as e:
             return [f"LLM Error: {str(e)}"]
     def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]:
-        """Generate high-level executive insights"""
         insights = []
-        # Technical Performance Insight
         mobile_score = context['technical_seo']['mobile_score']
         desktop_score = context['technical_seo']['desktop_score']
         avg_score = (mobile_score + desktop_score) / 2
@@ -198,7 +176,7 @@ Response:
         else:
             insights.append(f"🟢 Good: Website performance is solid (avg: {avg_score:.0f}/100)")
-        # Content Insight
         pages = context['content_audit']['pages_analyzed']
         if pages > 0:
             metadata = context['content_audit']['metadata_completeness']
@@ -209,7 +187,7 @@ Response:
             else:
                 insights.append(f"🟢 Content Quality: Metadata completeness is good ({title_pct:.0f}%)")
-        # Keywords Insight
         if context['keywords']['data_available']:
             total_keywords = context['keywords']['total_keywords']
             pos_dist = context['keywords']['position_distribution']
@@ -224,7 +202,7 @@ Response:
         else:
             insights.append("📊 Connect keyword tracking tools for visibility insights")
-        # Backlinks Insight
         if context['backlinks']['data_available']:
             ref_domains = context['backlinks']['total_ref_domains']
             domain_rating = context['backlinks']['domain_rating']
@@ -241,22 +219,65 @@ Response:
         return insights
     def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]:
-        """Extract priority actions from recommendations"""
         priority_actions = []
-        for rec in recommendations:
-            if '**HIGH**' in rec or '**CRITICAL**' in rec:
-                # Extract action title and description
-                parts = rec.replace('**HIGH**', '').replace('**CRITICAL**', '').strip()
-                if ':' in parts:
-                    title, description = parts.split(':', 1)
-                    priority_actions.append({
-                        'title': title.strip(),
-                        'description': description.strip(),
-                        'priority': 'HIGH'
-                    })
-        # If no high priority actions found, take first 3
         if not priority_actions and recommendations:
             for i, rec in enumerate(recommendations[:3]):
                 if ':' in rec:
@@ -267,15 +288,14 @@ Response:
                         'priority': 'HIGH'
                     })
-        return priority_actions[:5]  # Top 5 priority actions
     def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict,
                                          keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]:
-        """Generate basic recommendations when LLM is not available"""
         recommendations = []
-        # Technical recommendations
         mobile_score = technical_data.get('mobile_score', 0)
         desktop_score = technical_data.get('desktop_score', 0)
@@ -284,7 +304,7 @@ Response:
         if desktop_score < 50:
             recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript")
-        # Content recommendations
         pages = content_data.get('pages_analyzed', 0)
         if pages > 0:
             metadata = content_data.get('metadata_completeness', {})
@@ -294,7 +314,7 @@ Response:
             if content_data.get('avg_word_count', 0) < 300:
                 recommendations.append("**MEDIUM** Enhance Content: Increase average page content length")
-        # Keywords recommendations
         if not keywords_data.get('placeholder', False):
             total_keywords = keywords_data.get('total_keywords', 0)
             pos_dist = keywords_data.get('position_distribution', {})
@@ -304,7 +324,7 @@ Response:
         else:
             recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights")
-        # Backlinks recommendations
         if not backlinks_data.get('placeholder', False):
             ref_domains = backlinks_data.get('total_ref_domains', 0)
             if ref_domains < 50:
@@ -312,7 +332,7 @@ Response:
         else:
             recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis")
-        # Default recommendations if none generated
         if not recommendations:
             recommendations = [
                 "**HIGH** Audit Technical Issues: Review site speed and mobile performance",
@@ -329,8 +349,11 @@ Response:
         if error:
             insights.append(f"❌ LLM Error: {error}")
         return {
-            'recommendations': recommendations,
             'executive_insights': insights,
             'priority_actions': [
                 {

 import os
 import json
 from groq import Groq
 from dotenv import load_dotenv
 load_dotenv()
     def generate_recommendations(self, url: str, technical_data: Dict[str, Any],
                                content_data: Dict[str, Any], keywords_data: Dict[str, Any],
                                backlinks_data: Dict[str, Any]) -> Dict[str, Any]:
         if not self.available:
             return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data)
         try:
             context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data)
             recommendations = self._query_llm(context)
             return {
+                'recommendations_markdown': recommendations,
                 'executive_insights': self._generate_executive_insights(context),
+                'priority_actions': self._extract_priority_actions([recommendations]),
                 'data_source': 'Groq LLM Analysis',
                 'generated_at': context['analysis_date']
             }
     def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict,
                         keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]:
         context = {
             'website': url,
             'analysis_date': technical_data.get('last_updated', ''),
         return context
     def _query_llm(self, context: Dict[str, Any]) -> List[str]:
         prompt = f"""
 You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations.
 5. Prioritize recommendations by potential impact and ease of implementation
 6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies
 7. Provide estimated timelines and resources needed for each recommendation
+8. IMPORTANT: Use ONLY plain text format with markdown syntax - NO tables, NO complex formatting, NO HTML
+9. Format your response as clean markdown that can be rendered properly
+Generate exactly 8-12 specific recommendations using simple markdown format:
+## Priority: HIGH/MEDIUM/LOW
+**Action Title**
+Description with clear steps and expected impact.
+Timeline: X weeks
 Priority Levels: HIGH, MEDIUM, LOW
 Focus on actionable items that can be implemented within 30-90 days.
+Use simple markdown formatting only - headers, bold text, and bullet points.
 Response:
 """
                 messages=[
                     {'role': 'user', 'content': prompt}
                 ],
+                model="openai/gpt-oss-120b",
                 stream=False,
+                temperature=0.1,
                 max_tokens=1500
             )
             response = chat_completion.choices[0].message.content.strip()
+            # Return the full markdown response instead of parsing individual recommendations
+            return response
         except Exception as e:
             return [f"LLM Error: {str(e)}"]
     def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]:
         insights = []
         mobile_score = context['technical_seo']['mobile_score']
         desktop_score = context['technical_seo']['desktop_score']
         avg_score = (mobile_score + desktop_score) / 2
         else:
             insights.append(f"🟢 Good: Website performance is solid (avg: {avg_score:.0f}/100)")
         pages = context['content_audit']['pages_analyzed']
         if pages > 0:
             metadata = context['content_audit']['metadata_completeness']
             else:
                 insights.append(f"🟢 Content Quality: Metadata completeness is good ({title_pct:.0f}%)")
         if context['keywords']['data_available']:
             total_keywords = context['keywords']['total_keywords']
             pos_dist = context['keywords']['position_distribution']
         else:
             insights.append("📊 Connect keyword tracking tools for visibility insights")
         if context['backlinks']['data_available']:
             ref_domains = context['backlinks']['total_ref_domains']
             domain_rating = context['backlinks']['domain_rating']
         return insights
     def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]:
         priority_actions = []
+        # Handle the case where recommendations is a single string (markdown)
+        if isinstance(recommendations, list) and len(recommendations) == 1:
+            markdown_text = recommendations[0]
+        elif isinstance(recommendations, str):
+            markdown_text = recommendations
+        else:
+            markdown_text = ""
+        # Extract high priority actions from markdown
+        if markdown_text:
+            lines = markdown_text.split('\n')
+            current_priority = None
+            current_title = None
+            current_description = []
+            for line in lines:
+                line = line.strip()
+                if line.startswith('## Priority:'):
+                    # Save previous action if exists
+                    if current_title and current_priority == 'HIGH':
+                        priority_actions.append({
+                            'title': current_title,
+                            'description': ' '.join(current_description).strip(),
+                            'priority': 'HIGH'
+                        })
+                    # Start new action
+                    current_priority = line.replace('## Priority:', '').strip()
+                    current_title = None
+                    current_description = []
+                elif line.startswith('**') and line.endswith('**'):
+                    current_title = line.replace('**', '').strip()
+                elif line and not line.startswith('#'):
+                    current_description.append(line)
+            # Save last action if exists
+            if current_title and current_priority == 'HIGH':
+                priority_actions.append({
+                    'title': current_title,
+                    'description': ' '.join(current_description).strip(),
+                    'priority': 'HIGH'
+                })
+        # Fallback for old format
+        if not priority_actions and isinstance(recommendations, list):
+            for rec in recommendations:
+                if '**HIGH**' in rec or '**CRITICAL**' in rec:
+                    parts = rec.replace('**HIGH**', '').replace('**CRITICAL**', '').strip()
+                    if ':' in parts:
+                        title, description = parts.split(':', 1)
+                        priority_actions.append({
+                            'title': title.strip(),
+                            'description': description.strip(),
+                            'priority': 'HIGH'
+                        })
         if not priority_actions and recommendations:
             for i, rec in enumerate(recommendations[:3]):
                 if ':' in rec:
                         'priority': 'HIGH'
                     })
+        return priority_actions[:5]
     def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict,
                                          keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]:
         recommendations = []
         mobile_score = technical_data.get('mobile_score', 0)
         desktop_score = technical_data.get('desktop_score', 0)
         if desktop_score < 50:
             recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript")
         pages = content_data.get('pages_analyzed', 0)
         if pages > 0:
             metadata = content_data.get('metadata_completeness', {})
             if content_data.get('avg_word_count', 0) < 300:
                 recommendations.append("**MEDIUM** Enhance Content: Increase average page content length")
         if not keywords_data.get('placeholder', False):
             total_keywords = keywords_data.get('total_keywords', 0)
             pos_dist = keywords_data.get('position_distribution', {})
         else:
             recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights")
         if not backlinks_data.get('placeholder', False):
             ref_domains = backlinks_data.get('total_ref_domains', 0)
             if ref_domains < 50:
         else:
             recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis")
         if not recommendations:
             recommendations = [
                 "**HIGH** Audit Technical Issues: Review site speed and mobile performance",
         if error:
             insights.append(f"❌ LLM Error: {error}")
+        # Convert recommendations list to markdown format
+        markdown_recommendations = "\n".join([f"## Priority: HIGH\n**{rec.replace('**HIGH**', '').replace('**MEDIUM**', '').replace('**LOW**', '').strip()}**\n" for rec in recommendations])
         return {
+            'recommendations_markdown': markdown_recommendations,
             'executive_insights': insights,
             'priority_actions': [
                 {

modules/backlinks.py CHANGED Viewed

@@ -73,14 +73,12 @@ class BacklinksModule:
             )
     def _extract_domain(self, url: str) -> str:
-        """Extract clean domain from URL"""
         if not url.startswith(('http://', 'https://')):
             url = 'https://' + url
         domain = urlparse(url).netloc.replace('www.', '')
         return domain
     def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
-        """Make API request with retry logic"""
         if headers is None:
             headers = self.headers.copy()
@@ -90,8 +88,8 @@ class BacklinksModule:
                 if response.status_code == 200:
                     return response.json()
-                elif response.status_code == 429:  # Rate limit
-                    wait_time = (attempt + 1) * 2  # Exponential backoff
                     print(f"Rate limited, waiting {wait_time}s...")
                     time.sleep(wait_time)
                     continue
@@ -124,7 +122,7 @@ class BacklinksModule:
                 # Limit results for quick scan
                 if quick_scan:
                     return data[:50]
-                return data[:500]  # Reasonable limit to avoid memory issues
         except Exception as e:
             print(f"Individual backlinks API error: {str(e)}")
@@ -132,7 +130,6 @@ class BacklinksModule:
         return []
     def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
-        """Get Majestic domain metrics via RapidAPI"""
         try:
             headers = self.headers.copy()
             headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
@@ -274,7 +271,7 @@ class BacklinksModule:
         # Sort by backlinks count and return top domains
         top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
-        return top_domains[:20]  # Top 20 referring domains
     def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
         """Analyze anchor text distribution"""
@@ -282,7 +279,7 @@ class BacklinksModule:
         for link in backlinks:
             anchor = link.get('anchor', '').strip()
-            if not anchor or len(anchor) > 100:  # Skip very long anchors
                 continue
             if anchor not in anchor_stats:
@@ -316,7 +313,7 @@ class BacklinksModule:
         # Sort by backlinks count
         anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
-        return anchor_distribution[:15]  # Top 15 anchor texts
     def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
         """Calculate monthly backlinks changes"""
@@ -335,14 +332,14 @@ class BacklinksModule:
                 link_date = datetime.strptime(first_seen, '%Y-%m-%d')
                 if link_date >= last_month:
                     new_links += 1
-                if link_date >= now - timedelta(days=90):  # 3 months
                     recent_links += 1
             except Exception:
                 continue
         return {
             'new_backlinks': new_links,
-            'lost_backlinks': 0,  # Can't calculate without historical data
             'net_change': new_links,
             'recent_backlinks_3m': recent_links
         }
@@ -384,9 +381,9 @@ class BacklinksModule:
         # Quality score (0-100)
         quality_score = min(100, (
-            (follow_ratio * 0.4) +  # 40% weight on follow ratio
-            (avg_authority * 2) +   # 40% weight on authority (scaled)
-            (min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1)  # 20% on domain diversity
         ))
         return {
@@ -398,7 +395,6 @@ class BacklinksModule:
         }
     def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
-        """Track which data sources provided information"""
         sources = []
         if individual_backlinks:
@@ -411,7 +407,6 @@ class BacklinksModule:
         return sources or ['No data sources available']
     def _generate_no_api_data(self, url: str) -> ModuleResult:
-        """Generate response when no API key is available"""
         domain = self._extract_domain(url)
         no_api_data = {

             )
     def _extract_domain(self, url: str) -> str:
         if not url.startswith(('http://', 'https://')):
             url = 'https://' + url
         domain = urlparse(url).netloc.replace('www.', '')
         return domain
     def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
         if headers is None:
             headers = self.headers.copy()
                 if response.status_code == 200:
                     return response.json()
+                elif response.status_code == 429:
+                    wait_time = (attempt + 1) * 2
                     print(f"Rate limited, waiting {wait_time}s...")
                     time.sleep(wait_time)
                     continue
                 # Limit results for quick scan
                 if quick_scan:
                     return data[:50]
+                return data[:500]
         except Exception as e:
             print(f"Individual backlinks API error: {str(e)}")
         return []
     def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
         try:
             headers = self.headers.copy()
             headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
         # Sort by backlinks count and return top domains
         top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
+        return top_domains[:20]
     def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
         """Analyze anchor text distribution"""
         for link in backlinks:
             anchor = link.get('anchor', '').strip()
+            if not anchor or len(anchor) > 100:
                 continue
             if anchor not in anchor_stats:
         # Sort by backlinks count
         anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
+        return anchor_distribution[:15]
     def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
         """Calculate monthly backlinks changes"""
                 link_date = datetime.strptime(first_seen, '%Y-%m-%d')
                 if link_date >= last_month:
                     new_links += 1
+                if link_date >= now - timedelta(days=90):
                     recent_links += 1
             except Exception:
                 continue
         return {
             'new_backlinks': new_links,
+            'lost_backlinks': 0,
             'net_change': new_links,
             'recent_backlinks_3m': recent_links
         }
         # Quality score (0-100)
         quality_score = min(100, (
+            (follow_ratio * 0.4) +
+            (avg_authority * 2) +
+            (min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1)
         ))
         return {
         }
     def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
         sources = []
         if individual_backlinks:
         return sources or ['No data sources available']
     def _generate_no_api_data(self, url: str) -> ModuleResult:
         domain = self._extract_domain(url)
         no_api_data = {

modules/content_audit.py CHANGED Viewed

@@ -59,7 +59,6 @@ class ContentAuditModule:
             return self._get_fallback_data(url, str(e))
     def _get_sitemap_urls(self, base_url: str, limit: int = 200) -> List[str]:
-        """Extract URLs from sitemap.xml"""
         urls = []
         # Common sitemap locations
@@ -81,7 +80,6 @@ class ContentAuditModule:
         return urls[:limit]
     def _parse_sitemap(self, sitemap_content: bytes, base_url: str, limit: int) -> List[str]:
-        """Parse sitemap XML content"""
         urls = []
         try:
@@ -117,7 +115,6 @@ class ContentAuditModule:
         return urls[:limit]
     def _crawl_from_homepage(self, base_url: str, limit: int = 50) -> List[str]:
-        """Crawl URLs starting from homepage"""
         urls = set([base_url])
         processed = set()
@@ -143,7 +140,6 @@ class ContentAuditModule:
         return list(urls)[:limit]
     def _analyze_page(self, url: str) -> Dict[str, Any]:
-        """Analyze a single page"""
         try:
             response = self.session.get(url, timeout=15)
             if response.status_code != 200:
@@ -208,7 +204,6 @@ class ContentAuditModule:
             return soup.get_text()
     def _detect_cta(self, soup: BeautifulSoup) -> bool:
-        """Detect presence of call-to-action elements"""
         text_content = soup.get_text().lower()
         for keyword in self.cta_keywords:
@@ -225,7 +220,6 @@ class ContentAuditModule:
         return False
     def _get_last_modified(self, headers: Dict, soup: BeautifulSoup) -> str:
-        """Get last modified date from headers or meta tags"""
         # Check headers first
         if 'last-modified' in headers:
             return headers['last-modified']
@@ -240,7 +234,6 @@ class ContentAuditModule:
         return ""
     def _is_valid_content_url(self, url: str) -> bool:
-        """Check if URL is valid for content analysis"""
         if not url:
             return False
@@ -261,7 +254,6 @@ class ContentAuditModule:
         return True
     def _is_same_domain(self, url1: str, url2: str) -> bool:
-        """Check if two URLs are from the same domain"""
         try:
             domain1 = urlparse(url1).netloc
             domain2 = urlparse(url2).netloc
@@ -270,7 +262,6 @@ class ContentAuditModule:
             return False
     def _calculate_metrics(self, base_url: str, pages_data: List[Dict], quick_scan: bool) -> Dict[str, Any]:
-        """Calculate aggregate metrics from page data"""
         total_pages = len(pages_data)
         valid_pages = [p for p in pages_data if 'error' not in p]
@@ -318,7 +309,6 @@ class ContentAuditModule:
         }
     def _analyze_content_freshness(self, pages_data: List[Dict]) -> Dict[str, Any]:
-        """Analyze content freshness based on last modified dates"""
         now = datetime.now()
         six_months_ago = now - timedelta(days=180)
         eighteen_months_ago = now - timedelta(days=540)
@@ -361,7 +351,6 @@ class ContentAuditModule:
         }
     def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
-        """Return fallback data when analysis fails"""
         return {
             'url': url,
             'error': f"Content audit failed: {error}",

             return self._get_fallback_data(url, str(e))
     def _get_sitemap_urls(self, base_url: str, limit: int = 200) -> List[str]:
         urls = []
         # Common sitemap locations
         return urls[:limit]
     def _parse_sitemap(self, sitemap_content: bytes, base_url: str, limit: int) -> List[str]:
         urls = []
         try:
         return urls[:limit]
     def _crawl_from_homepage(self, base_url: str, limit: int = 50) -> List[str]:
         urls = set([base_url])
         processed = set()
         return list(urls)[:limit]
     def _analyze_page(self, url: str) -> Dict[str, Any]:
         try:
             response = self.session.get(url, timeout=15)
             if response.status_code != 200:
             return soup.get_text()
     def _detect_cta(self, soup: BeautifulSoup) -> bool:
         text_content = soup.get_text().lower()
         for keyword in self.cta_keywords:
         return False
     def _get_last_modified(self, headers: Dict, soup: BeautifulSoup) -> str:
         # Check headers first
         if 'last-modified' in headers:
             return headers['last-modified']
         return ""
     def _is_valid_content_url(self, url: str) -> bool:
         if not url:
             return False
         return True
     def _is_same_domain(self, url1: str, url2: str) -> bool:
         try:
             domain1 = urlparse(url1).netloc
             domain2 = urlparse(url2).netloc
             return False
     def _calculate_metrics(self, base_url: str, pages_data: List[Dict], quick_scan: bool) -> Dict[str, Any]:
         total_pages = len(pages_data)
         valid_pages = [p for p in pages_data if 'error' not in p]
         }
     def _analyze_content_freshness(self, pages_data: List[Dict]) -> Dict[str, Any]:
         now = datetime.now()
         six_months_ago = now - timedelta(days=180)
         eighteen_months_ago = now - timedelta(days=540)
         }
     def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
         return {
             'url': url,
             'error': f"Content audit failed: {error}",

modules/keywords.py CHANGED Viewed

@@ -118,13 +118,11 @@ class KeywordsModule:
             )
     def _extract_domain(self, url: str) -> str:
-        """Extract domain from URL"""
         if not url.startswith(('http://', 'https://')):
             url = 'https://' + url
         return urlparse(url).netloc.replace('www.', '')
     def _fetch_domain_keywords(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
-        """Fetch keywords data for a domain using Competitors Ranking Keywords API"""
         try:
             all_keywords = []
             offset = 0
@@ -187,7 +185,6 @@ class KeywordsModule:
             return {'success': False, 'error': str(e)}
     def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
-        """Calculate domain statistics from keywords data"""
         total_keywords = len(keywords)
         # Position distribution
@@ -221,7 +218,6 @@ class KeywordsModule:
     def _process_keywords_data(self, main_data: Dict, competitor_data: Dict,
                              domain: str, competitor_domains: List[str]) -> Dict[str, Any]:
-        """Process and structure the keywords data"""
         stats = main_data['statistics']['organic']
         keywords = main_data['keywords']
@@ -288,7 +284,6 @@ class KeywordsModule:
         }
     def _identify_best_keywords(self, keywords: List[Dict]) -> List[Dict]:
-        """Identify best performing keywords"""
         best_candidates = [
             k for k in keywords
             if k.get('rank', 100) <= 3 and k.get('estimated_traffic_volume', 0) > 10
@@ -310,7 +305,6 @@ class KeywordsModule:
         ]
     def _identify_declining_keywords(self, keywords: List[Dict]) -> List[Dict]:
-        """Identify keywords with declining performance"""
         declining_candidates = []
         for k in keywords:
@@ -333,7 +327,6 @@ class KeywordsModule:
     def _analyze_competitor_gaps(self, main_keywords: List[Dict], competitor_data: Dict,
                                domain: str, competitor_domains: List[str]) -> Tuple[List[Dict], List[Dict]]:
-        """Analyze competitor gaps and opportunities"""
         opportunities = []
         competitor_summary = []
@@ -385,10 +378,9 @@ class KeywordsModule:
         # Sort all opportunities by priority score
         opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
-        return opportunities[:50], competitor_summary  # Top 50 overall
     def _calculate_opportunity_score(self, competitor_rank: int, search_volume: int, difficulty: int) -> float:
-        """Calculate opportunity score using the PRD algorithm"""
         position_ctr = {1: 28, 2: 15, 3: 11, 4: 8, 5: 7, 10: 2, 20: 1}
         # Find closest CTR value
@@ -406,7 +398,6 @@ class KeywordsModule:
         return min(round(score, 1), 100)
     def _estimate_difficulty(self, rank: int, volume: int) -> int:
-        """Estimate keyword difficulty based on rank and volume"""
         # Simple heuristic - in practice, this would come from a keyword difficulty API
         if rank <= 3:
             return 20 + (volume // 1000) * 5
@@ -416,7 +407,6 @@ class KeywordsModule:
             return 50 + (volume // 1000) * 2
     def _enrich_keywords_data(self, keywords: List[Dict]) -> List[Dict]:
-        """Enrich keywords with volume and CPC data"""
         # Identify keywords needing enrichment
         keywords_to_enrich = [
             k for k in keywords
@@ -445,7 +435,6 @@ class KeywordsModule:
         return enriched_keywords
     def _batch_enrich_keywords(self, keywords: List[str]) -> Dict[str, Dict]:
-        """Batch enrich keywords using Google Keyword Insight API"""
         enriched_data = {}
         # Process in batches
@@ -518,17 +507,14 @@ class KeywordsModule:
         return enriched_data
     def _get_cache_key(self, keyword: str) -> str:
-        """Generate cache key for keyword"""
         return hashlib.md5(keyword.lower().encode()).hexdigest()
     def _calculate_enrichment_rate(self, keywords: List[Dict]) -> float:
-        """Calculate the percentage of keywords with volume data"""
         enriched = sum(1 for k in keywords if k.get('avg_search_volume', 0) > 0)
         total = len(keywords)
         return round(enriched / total * 100, 1) if total > 0 else 0
     def _determine_trend(self, keyword_data: Dict) -> str:
-        """Determine keyword trend based on rank changes"""
         current_rank = keyword_data.get('rank', 100)
         previous_rank = keyword_data.get('previous_rank', 100)
@@ -542,13 +528,11 @@ class KeywordsModule:
             return 'stable'
     def _rate_limit_primary_api(self):
-        """Rate limiting for primary API (60 requests/minute)"""
         current_time = time.time()
-        if current_time - self.last_primary_call < 1:  # 1 second between calls
             time.sleep(1)
     def _rate_limit_enrichment_api(self):
-        """Rate limiting for enrichment API (100 requests/minute)"""
         current_time = time.time()
-        if current_time - self.last_enrichment_call < 0.6:  # 0.6 seconds between calls
             time.sleep(0.6)

             )
     def _extract_domain(self, url: str) -> str:
         if not url.startswith(('http://', 'https://')):
             url = 'https://' + url
         return urlparse(url).netloc.replace('www.', '')
     def _fetch_domain_keywords(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
         try:
             all_keywords = []
             offset = 0
             return {'success': False, 'error': str(e)}
     def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
         total_keywords = len(keywords)
         # Position distribution
     def _process_keywords_data(self, main_data: Dict, competitor_data: Dict,
                              domain: str, competitor_domains: List[str]) -> Dict[str, Any]:
         stats = main_data['statistics']['organic']
         keywords = main_data['keywords']
         }
     def _identify_best_keywords(self, keywords: List[Dict]) -> List[Dict]:
         best_candidates = [
             k for k in keywords
             if k.get('rank', 100) <= 3 and k.get('estimated_traffic_volume', 0) > 10
         ]
     def _identify_declining_keywords(self, keywords: List[Dict]) -> List[Dict]:
         declining_candidates = []
         for k in keywords:
     def _analyze_competitor_gaps(self, main_keywords: List[Dict], competitor_data: Dict,
                                domain: str, competitor_domains: List[str]) -> Tuple[List[Dict], List[Dict]]:
         opportunities = []
         competitor_summary = []
         # Sort all opportunities by priority score
         opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
+        return opportunities[:50], competitor_summary
     def _calculate_opportunity_score(self, competitor_rank: int, search_volume: int, difficulty: int) -> float:
         position_ctr = {1: 28, 2: 15, 3: 11, 4: 8, 5: 7, 10: 2, 20: 1}
         # Find closest CTR value
         return min(round(score, 1), 100)
     def _estimate_difficulty(self, rank: int, volume: int) -> int:
         # Simple heuristic - in practice, this would come from a keyword difficulty API
         if rank <= 3:
             return 20 + (volume // 1000) * 5
             return 50 + (volume // 1000) * 2
     def _enrich_keywords_data(self, keywords: List[Dict]) -> List[Dict]:
         # Identify keywords needing enrichment
         keywords_to_enrich = [
             k for k in keywords
         return enriched_keywords
     def _batch_enrich_keywords(self, keywords: List[str]) -> Dict[str, Dict]:
         enriched_data = {}
         # Process in batches
         return enriched_data
     def _get_cache_key(self, keyword: str) -> str:
         return hashlib.md5(keyword.lower().encode()).hexdigest()
     def _calculate_enrichment_rate(self, keywords: List[Dict]) -> float:
         enriched = sum(1 for k in keywords if k.get('avg_search_volume', 0) > 0)
         total = len(keywords)
         return round(enriched / total * 100, 1) if total > 0 else 0
     def _determine_trend(self, keyword_data: Dict) -> str:
         current_rank = keyword_data.get('rank', 100)
         previous_rank = keyword_data.get('previous_rank', 100)
             return 'stable'
     def _rate_limit_primary_api(self):
         current_time = time.time()
+        if current_time - self.last_primary_call < 1:
             time.sleep(1)
     def _rate_limit_enrichment_api(self):
         current_time = time.time()
+        if current_time - self.last_enrichment_call < 0.6:
             time.sleep(0.6)

modules/technical_seo.py CHANGED Viewed

@@ -4,12 +4,6 @@ from typing import Dict, Any, Optional
 class TechnicalSEOModule:
     def __init__(self, api_key: Optional[str] = None):
-        """
-        Initialize Technical SEO module
-        Args:
-            api_key: Google PageSpeed Insights API key (optional for basic usage)
-        """
         self.api_key = api_key
         self.base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
@@ -45,7 +39,6 @@ class TechnicalSEOModule:
             return self._get_fallback_data(url, str(e))
     def _get_pagespeed_data(self, url: str, strategy: str) -> Dict[str, Any]:
-        """Get PageSpeed Insights data for URL and strategy"""
         params = {
             'url': url,
             'strategy': strategy,
@@ -64,7 +57,6 @@ class TechnicalSEOModule:
             raise
     def _extract_metrics(self, data: Dict[str, Any], strategy: str) -> Dict[str, Any]:
-        """Extract key performance metrics from PageSpeed data"""
         lighthouse_result = data.get('lighthouseResult', {})
         categories = lighthouse_result.get('categories', {})
         audits = lighthouse_result.get('audits', {})
@@ -91,7 +83,6 @@ class TechnicalSEOModule:
         }
     def _extract_core_web_vitals(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
-        """Extract Core Web Vitals metrics"""
         def get_metric_value(data, metric_key):
             audits = data.get('lighthouseResult', {}).get('audits', {})
             metric = audits.get(metric_key, {})
@@ -116,7 +107,6 @@ class TechnicalSEOModule:
         }
     def _extract_opportunities(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
-        """Extract optimization opportunities"""
         mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
         opportunities = []
@@ -128,7 +118,7 @@ class TechnicalSEOModule:
         for key in opportunity_keys:
             audit = mobile_audits.get(key, {})
-            if audit.get('score', 1) < 0.9:  # Only include if score is low
                 opportunities.append({
                     'id': key,
                     'title': audit.get('title', key.replace('-', ' ').title()),
@@ -137,10 +127,9 @@ class TechnicalSEOModule:
                     'potential_savings': audit.get('details', {}).get('overallSavingsMs', 0)
                 })
-        return {'opportunities': opportunities[:5]}  # Top 5 opportunities
     def _extract_diagnostics(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
-        """Extract diagnostic information"""
         mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
         diagnostics = []
@@ -162,7 +151,6 @@ class TechnicalSEOModule:
         return {'diagnostics': diagnostics}
     def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
-        """Return fallback data when API fails"""
         return {
             'url': url,
             'error': f"PageSpeed API unavailable: {error}",

 class TechnicalSEOModule:
     def __init__(self, api_key: Optional[str] = None):
         self.api_key = api_key
         self.base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
             return self._get_fallback_data(url, str(e))
     def _get_pagespeed_data(self, url: str, strategy: str) -> Dict[str, Any]:
         params = {
             'url': url,
             'strategy': strategy,
             raise
     def _extract_metrics(self, data: Dict[str, Any], strategy: str) -> Dict[str, Any]:
         lighthouse_result = data.get('lighthouseResult', {})
         categories = lighthouse_result.get('categories', {})
         audits = lighthouse_result.get('audits', {})
         }
     def _extract_core_web_vitals(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
         def get_metric_value(data, metric_key):
             audits = data.get('lighthouseResult', {}).get('audits', {})
             metric = audits.get(metric_key, {})
         }
     def _extract_opportunities(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
         mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
         opportunities = []
         for key in opportunity_keys:
             audit = mobile_audits.get(key, {})
+            if audit.get('score', 1) < 0.9:
                 opportunities.append({
                     'id': key,
                     'title': audit.get('title', key.replace('-', ' ').title()),
                     'potential_savings': audit.get('details', {}).get('overallSavingsMs', 0)
                 })
+        return {'opportunities': opportunities[:5]}
     def _extract_diagnostics(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
         mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
         diagnostics = []
         return {'diagnostics': diagnostics}
     def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
         return {
             'url': url,
             'error': f"PageSpeed API unavailable: {error}",

report_generator.py CHANGED Viewed

@@ -5,11 +5,45 @@ import plotly.graph_objects as go
 import plotly.express as px
 from plotly.offline import plot
 import plotly
 class ReportGenerator:
     def __init__(self):
         self.report_template = self._get_report_template()
     def generate_html_report(self, url: str, technical_data: Dict[str, Any],
                            content_data: Dict[str, Any], competitor_data: List[Dict] = None,
                            keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
@@ -44,8 +78,7 @@ class ReportGenerator:
         if competitor_data:
             competitor_section = self._generate_competitor_section(competitor_data, technical_data, content_data)
-        # Generate placeholder sections
-        placeholder_sections = self._generate_placeholder_sections()
         # Generate recommendations
         recommendations = self._generate_recommendations(technical_data, content_data)
@@ -61,7 +94,7 @@ class ReportGenerator:
             keywords_section=keywords_section,
             backlinks_section=backlinks_section,
             competitor_section=competitor_section,
-            placeholder_sections=placeholder_sections,
             recommendations=recommendations,
             llm_recommendations=recommendations_section
         )
@@ -538,50 +571,7 @@ class ReportGenerator:
         return comparison_html
-    def _generate_placeholder_sections(self) -> str:
-        """Generate placeholder sections for future modules"""
-        return """
-        <div class="placeholder-sections">
-            <div class="placeholder-section">
-                <h3>🔍 Keyword Rankings</h3>
-                <div class="placeholder-content">
-                    <p><em>Coming in future versions</em></p>
-                    <ul>
-                        <li>Google Search Console integration</li>
-                        <li>Keyword ranking positions</li>
-                        <li>Search volume analysis</li>
-                        <li>Keyword opportunities</li>
-                    </ul>
-                </div>
-            </div>
-            <div class="placeholder-section">
-                <h3>🔗 Backlink Profile</h3>
-                <div class="placeholder-content">
-                    <p><em>Coming in future versions</em></p>
-                    <ul>
-                        <li>Total backlinks and referring domains</li>
-                        <li>Domain authority metrics</li>
-                        <li>Anchor text analysis</li>
-                        <li>Link acquisition opportunities</li>
-                    </ul>
-                </div>
-            </div>
-            <div class="placeholder-section">
-                <h3>📈 Conversion Tracking</h3>
-                <div class="placeholder-content">
-                    <p><em>Coming in future versions</em></p>
-                    <ul>
-                        <li>Google Analytics integration</li>
-                        <li>Organic traffic conversion rates</li>
-                        <li>Goal completion tracking</li>
-                        <li>Revenue attribution</li>
-                    </ul>
-                </div>
-            </div>
-        </div>
-        """
     def _generate_recommendations(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]) -> str:
         """Generate prioritized recommendations"""
@@ -830,11 +820,11 @@ class ReportGenerator:
         """
     def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
-        """Generate LLM-powered recommendations section"""
         if not llm_recommendations:
             return ""
-        recommendations = llm_recommendations.get('recommendations', [])
         executive_insights = llm_recommendations.get('executive_insights', [])
         priority_actions = llm_recommendations.get('priority_actions', [])
@@ -861,12 +851,17 @@ class ReportGenerator:
                 """
             priority_html += "</div>"
         recommendations_html = ""
-        if recommendations:
-            recommendations_html = "<div class='llm-recommendations'><h4>🤖 AI-Generated Recommendations</h4><ul>"
-            for rec in recommendations:
-                recommendations_html += f"<li>{rec}</li>"
-            recommendations_html += "</ul></div>"
         return f"""
         <div class="card">
@@ -1258,6 +1253,56 @@ class ReportGenerator:
                     text-align: center;
                 }}
                 @media (max-width: 768px) {{
                     .report-container {{
                         padding: 10px;

 import plotly.express as px
 from plotly.offline import plot
 import plotly
+import re
 class ReportGenerator:
     def __init__(self):
         self.report_template = self._get_report_template()
+    def _markdown_to_html(self, markdown_text: str) -> str:
+        """Convert simple markdown to HTML"""
+        if not markdown_text:
+            return ""
+        html = markdown_text
+        # Convert headers
+        html = re.sub(r'^### (.*?)$', r'<h3>\1</h3>', html, flags=re.MULTILINE)
+        html = re.sub(r'^## (.*?)$', r'<h2>\1</h2>', html, flags=re.MULTILINE)
+        html = re.sub(r'^# (.*?)$', r'<h1>\1</h1>', html, flags=re.MULTILINE)
+        # Convert bold text
+        html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
+        # Convert bullet points
+        html = re.sub(r'^- (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
+        html = re.sub(r'^• (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
+        # Wrap consecutive <li> tags in <ul>
+        html = re.sub(r'(<li>.*?</li>(?:\s*<li>.*?</li>)*)', r'<ul>\1</ul>', html, flags=re.DOTALL)
+        # Convert line breaks to <br> tags
+        html = html.replace('\n', '<br>')
+        # Clean up extra <br> tags around block elements
+        html = re.sub(r'<br>\s*(<h[1-6]>)', r'\1', html)
+        html = re.sub(r'(</h[1-6]>)\s*<br>', r'\1', html)
+        html = re.sub(r'<br>\s*(<ul>)', r'\1', html)
+        html = re.sub(r'(</ul>)\s*<br>', r'\1', html)
+        return html
     def generate_html_report(self, url: str, technical_data: Dict[str, Any],
                            content_data: Dict[str, Any], competitor_data: List[Dict] = None,
                            keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
         if competitor_data:
             competitor_section = self._generate_competitor_section(competitor_data, technical_data, content_data)
         # Generate recommendations
         recommendations = self._generate_recommendations(technical_data, content_data)
             keywords_section=keywords_section,
             backlinks_section=backlinks_section,
             competitor_section=competitor_section,
             recommendations=recommendations,
             llm_recommendations=recommendations_section
         )
         return comparison_html
     def _generate_recommendations(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]) -> str:
         """Generate prioritized recommendations"""
         """
     def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
+        """Generate LLM-powered recommendations section with markdown rendering"""
         if not llm_recommendations:
             return ""
+        recommendations_markdown = llm_recommendations.get('recommendations_markdown', '')
         executive_insights = llm_recommendations.get('executive_insights', [])
         priority_actions = llm_recommendations.get('priority_actions', [])
                 """
             priority_html += "</div>"
+        # Convert markdown recommendations to HTML
         recommendations_html = ""
+        if recommendations_markdown:
+            recommendations_html = f"""
+            <div class='llm-recommendations'>
+                <h4>🤖 AI-Generated Recommendations</h4>
+                <div class="markdown-content">
+                    {self._markdown_to_html(recommendations_markdown)}
+                </div>
+            </div>
+            """
         return f"""
         <div class="card">
                     text-align: center;
                 }}
+                .markdown-content {{
+                    line-height: 1.6;
+                    color: #2c3e50;
+                }}
+                .markdown-content h1 {{
+                    color: #2c3e50;
+                    border-bottom: 2px solid #3498db;
+                    padding-bottom: 10px;
+                    margin-top: 30px;
+                    margin-bottom: 20px;
+                }}
+                .markdown-content h2 {{
+                    color: #34495e;
+                    margin-top: 25px;
+                    margin-bottom: 15px;
+                    font-size: 1.3em;
+                }}
+                .markdown-content h3 {{
+                    color: #34495e;
+                    margin-top: 20px;
+                    margin-bottom: 10px;
+                    font-size: 1.1em;
+                }}
+                .markdown-content strong {{
+                    color: #2c3e50;
+                    font-weight: 600;
+                }}
+                .markdown-content ul {{
+                    margin: 15px 0;
+                    padding-left: 20px;
+                }}
+                .markdown-content li {{
+                    margin-bottom: 8px;
+                    line-height: 1.5;
+                }}
+                .llm-recommendations {{
+                    background: #f8f9fa;
+                    border-left: 4px solid #3498db;
+                    padding: 20px;
+                    margin: 20px 0;
+                    border-radius: 0 8px 8px 0;
+                }}
                 @media (max-width: 768px) {{
                     .report-container {{
                         padding: 10px;

simple_pdf_generator.py CHANGED Viewed

@@ -17,13 +17,10 @@ class SimplePDFGenerator:
             self.available = False
     def generate_pdf(self, html_content: str) -> bytes:
-        """
-        Generate PDF from HTML content with better formatting
-        """
         if not self.available:
             raise ImportError("PDF generation requires reportlab: pip install reportlab")
-        # Import reportlab components
         from reportlab.pdfgen import canvas
         from reportlab.lib.pagesizes import letter, A4
         from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
@@ -36,10 +33,10 @@ class SimplePDFGenerator:
         # Parse HTML and extract content
         soup = BeautifulSoup(html_content, 'html.parser')
-        # Create PDF buffer
         buffer = io.BytesIO()
-        # Create PDF document with margins
         doc = SimpleDocTemplate(
             buffer,
             pagesize=A4,
@@ -49,17 +46,17 @@ class SimplePDFGenerator:
             rightMargin=0.75*inch
         )
-        # Define custom styles
         styles = getSampleStyleSheet()
-        # Custom styles
         title_style = ParagraphStyle(
             'CustomTitle',
             parent=styles['Heading1'],
             fontSize=24,
             textColor=black,
             spaceAfter=20,
-            alignment=1  # Center
         )
         header_style = ParagraphStyle(
@@ -82,7 +79,7 @@ class SimplePDFGenerator:
         story = []
-        # Add report title
         title = "SEO Analysis Report"
         url_elem = soup.find(string=re.compile(r'https?://'))
         if url_elem:
@@ -93,13 +90,13 @@ class SimplePDFGenerator:
         story.append(Paragraph(title, title_style))
         story.append(Spacer(1, 20))
-        # Extract and format content systematically
         self._extract_executive_summary(soup, story, header_style, styles['Normal'])
         self._extract_technical_seo(soup, story, header_style, subheader_style, styles['Normal'])
         self._extract_content_audit(soup, story, header_style, subheader_style, styles['Normal'])
         self._extract_recommendations(soup, story, header_style, styles['Normal'])
-        # Build PDF
         doc.build(story)
         # Get PDF data
@@ -107,12 +104,11 @@ class SimplePDFGenerator:
         return buffer.getvalue()
     def _extract_executive_summary(self, soup, story, header_style, normal_style):
-        """Extract executive summary section"""
         exec_section = soup.find(string=re.compile(r'Executive Summary', re.I))
         if exec_section:
             story.append(Paragraph("Executive Summary", header_style))
-            # Look for health score
             health_text = soup.find(string=re.compile(r'Overall SEO Health', re.I))
             if health_text:
                 parent = health_text.find_parent()
@@ -122,14 +118,13 @@ class SimplePDFGenerator:
                     story.append(Spacer(1, 10))
     def _extract_technical_seo(self, soup, story, header_style, subheader_style, normal_style):
-        """Extract technical SEO section"""
         tech_section = soup.find(string=re.compile(r'Technical SEO', re.I))
         if tech_section:
             story.append(Paragraph("Technical SEO Analysis", header_style))
-            # Look for performance scores
             perf_elements = soup.find_all(string=re.compile(r'Performance Score|Mobile|Desktop', re.I))
-            for elem in perf_elements[:3]:  # Limit results
                 parent = elem.find_parent()
                 if parent:
                     text = parent.get_text().strip()
@@ -138,14 +133,13 @@ class SimplePDFGenerator:
             story.append(Spacer(1, 10))
     def _extract_content_audit(self, soup, story, header_style, subheader_style, normal_style):
-        """Extract content audit section"""
         content_section = soup.find(string=re.compile(r'Content Audit', re.I))
         if content_section:
             story.append(Paragraph("Content Audit", header_style))
-            # Look for content metrics
             content_elements = soup.find_all(string=re.compile(r'Pages Analyzed|Metadata|Word Count', re.I))
-            for elem in content_elements[:3]:  # Limit results
                 parent = elem.find_parent()
                 if parent:
                     text = parent.get_text().strip()
@@ -154,23 +148,19 @@ class SimplePDFGenerator:
             story.append(Spacer(1, 10))
     def _extract_recommendations(self, soup, story, header_style, normal_style):
-        """Extract recommendations section"""
         rec_section = soup.find(string=re.compile(r'Recommendation', re.I))
         if rec_section:
             story.append(Paragraph("Recommendations", header_style))
-            # Look for recommendation items
             rec_elements = soup.find_all('li')
-            for elem in rec_elements[:5]:  # Top 5 recommendations
                 text = elem.get_text().strip()
                 if len(text) > 15:
                     story.append(Paragraph(f"• {text}", normal_style))
             story.append(Spacer(1, 10))
 def create_browser_pdf_instructions() -> str:
-    """
-    Return instructions for manual PDF creation using browser
-    """
     return """
     ## How to Create PDF from HTML Report:

             self.available = False
     def generate_pdf(self, html_content: str) -> bytes:
         if not self.available:
             raise ImportError("PDF generation requires reportlab: pip install reportlab")
         from reportlab.pdfgen import canvas
         from reportlab.lib.pagesizes import letter, A4
         from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
         # Parse HTML and extract content
         soup = BeautifulSoup(html_content, 'html.parser')
         buffer = io.BytesIO()
         doc = SimpleDocTemplate(
             buffer,
             pagesize=A4,
             rightMargin=0.75*inch
         )
         styles = getSampleStyleSheet()
         title_style = ParagraphStyle(
             'CustomTitle',
             parent=styles['Heading1'],
             fontSize=24,
             textColor=black,
             spaceAfter=20,
+            alignment=1
         )
         header_style = ParagraphStyle(
         story = []
         title = "SEO Analysis Report"
         url_elem = soup.find(string=re.compile(r'https?://'))
         if url_elem:
         story.append(Paragraph(title, title_style))
         story.append(Spacer(1, 20))
         self._extract_executive_summary(soup, story, header_style, styles['Normal'])
         self._extract_technical_seo(soup, story, header_style, subheader_style, styles['Normal'])
         self._extract_content_audit(soup, story, header_style, subheader_style, styles['Normal'])
         self._extract_recommendations(soup, story, header_style, styles['Normal'])
         doc.build(story)
         # Get PDF data
         return buffer.getvalue()
     def _extract_executive_summary(self, soup, story, header_style, normal_style):
         exec_section = soup.find(string=re.compile(r'Executive Summary', re.I))
         if exec_section:
             story.append(Paragraph("Executive Summary", header_style))
             health_text = soup.find(string=re.compile(r'Overall SEO Health', re.I))
             if health_text:
                 parent = health_text.find_parent()
                     story.append(Spacer(1, 10))
     def _extract_technical_seo(self, soup, story, header_style, subheader_style, normal_style):
         tech_section = soup.find(string=re.compile(r'Technical SEO', re.I))
         if tech_section:
             story.append(Paragraph("Technical SEO Analysis", header_style))
             perf_elements = soup.find_all(string=re.compile(r'Performance Score|Mobile|Desktop', re.I))
+            for elem in perf_elements[:3]:
                 parent = elem.find_parent()
                 if parent:
                     text = parent.get_text().strip()
             story.append(Spacer(1, 10))
     def _extract_content_audit(self, soup, story, header_style, subheader_style, normal_style):
         content_section = soup.find(string=re.compile(r'Content Audit', re.I))
         if content_section:
             story.append(Paragraph("Content Audit", header_style))
             content_elements = soup.find_all(string=re.compile(r'Pages Analyzed|Metadata|Word Count', re.I))
+            for elem in content_elements[:3]:
                 parent = elem.find_parent()
                 if parent:
                     text = parent.get_text().strip()
             story.append(Spacer(1, 10))
     def _extract_recommendations(self, soup, story, header_style, normal_style):
         rec_section = soup.find(string=re.compile(r'Recommendation', re.I))
         if rec_section:
             story.append(Paragraph("Recommendations", header_style))
             rec_elements = soup.find_all('li')
+            for elem in rec_elements[:5]:
                 text = elem.get_text().strip()
                 if len(text) > 15:
                     story.append(Paragraph(f"• {text}", normal_style))
             story.append(Spacer(1, 10))
 def create_browser_pdf_instructions() -> str:
     return """
     ## How to Create PDF from HTML Report: