python
/
AI-Copyright-Application-Generator


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
前端页面完整性验证脚本
检查页面清单中定义的所有页面是否都已生成，并验证页面代码的完整性

注意：验证时会考虑CSS处理策略
- 原始HTML文件应包含完整的CSS样式
- 拼接后的文档会移除CSS，只保留HTML结构和JavaScript
- 验证会检查CSS省略标记的存在
"""

import os
import json
import re
from pathlib import Path

class Colors:
    """终端颜色定义"""
    RED = '\033[0;31m'
    GREEN = '\033[0;32m'
    YELLOW = '\033[1;33m'
    BLUE = '\033[0;34m'
    PURPLE = '\033[0;35m'
    CYAN = '\033[0;36m'
    NC = '\033[0m'  # No Color

def print_colored(color, message):
    """打印带颜色的消息"""
    print(f"{color}{message}{Colors.NC}")

def print_success(message):
    print_colored(Colors.GREEN, f"✅ {message}")

def print_warning(message):
    print_colored(Colors.YELLOW, f"⚠️  {message}")

def print_error(message):
    print_colored(Colors.RED, f"❌ {message}")

def print_info(message):
    print_colored(Colors.BLUE, f"ℹ️  {message}")

def extract_pages_from_page_list(page_list_file):
    """从页面清单文档中提取页面列表"""
    if not os.path.exists(page_list_file):
        print_error(f"页面清单文件不存在: {page_list_file}")
        return []
    
    try:
        with open(page_list_file, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # 提取页面信息的正则表达式模式
        patterns = [
            r'(\d+)\.\s*(.+?)\.html',  # 匹配 "1. login.html"
            r'`(.+?)\.html`',          # 匹配 "`login.html`"
            r'(\w+)\.html',            # 匹配 "login.html"
        ]
        
        pages = set()
        for pattern in patterns:
            matches = re.findall(pattern, content, re.MULTILINE)
            for match in matches:
                if isinstance(match, tuple):
                    page_name = match[-1] if len(match) > 1 else match[0]
                else:
                    page_name = match
                
                if page_name and not page_name.isdigit():
                    pages.add(f"{page_name}.html")
        
        return sorted(list(pages))
        
    except Exception as e:
        print_error(f"读取页面清单文件失败: {e}")
        return []

def check_html_completeness(html_file):
    """检查HTML文件的完整性"""
    issues = []
    
    try:
        with open(html_file, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # 检查基本HTML结构
        if not re.search(r'<!DOCTYPE\s+html>', content, re.IGNORECASE):
            issues.append("缺少 DOCTYPE 声明")
        
        if not re.search(r'<html[^>]*>', content, re.IGNORECASE):
            issues.append("缺少 <html> 标签")
        
        if not re.search(r'<head[^>]*>.*</head>', content, re.IGNORECASE | re.DOTALL):
            issues.append("缺少完整的 <head> 部分")
        
        if not re.search(r'<body[^>]*>.*</body>', content, re.IGNORECASE | re.DOTALL):
            issues.append("缺少完整的 <body> 部分")
        
        # 检查CSS样式（原始HTML文件应该包含CSS，拼接后的文档会移除CSS）
        has_css = (
            re.search(r'<style[^>]*>.*</style>', content, re.IGNORECASE | re.DOTALL) or
            re.search(r'<link[^>]*stylesheet[^>]*>', content, re.IGNORECASE) or
            re.search(r'<!-- CSS.*已省略', content, re.IGNORECASE)  # 检查CSS省略标记
        )
        if not has_css:
            issues.append("缺少CSS样式或CSS省略标记")
        
        # 检查文件大小（过小可能不完整）
        file_size = os.path.getsize(html_file)
        if file_size < 1024:  # 小于1KB
            issues.append(f"文件过小 ({file_size} bytes)，可能不完整")
        
        # 检查是否包含省略标记
        omission_patterns = [
            r'此处省略',
            r'代码较长.*省略',
            r'其余.*类似',
            r'\[注：.*省略.*\]',
            r'<!-- 省略 -->',
            r'省略其余',
        ]
        
        for pattern in omission_patterns:
            if re.search(pattern, content, re.IGNORECASE):
                issues.append(f"发现省略标记: {pattern}")
        
        return issues
        
    except Exception as e:
        return [f"无法读取文件: {e}"]

def validate_frontend_pages():
    """验证前端页面完整性"""
    print_colored(Colors.PURPLE, "🔍 前端页面完整性验证")
    print_colored(Colors.BLUE, "=" * 60)
    
    # 获取项目路径
    base_dir = Path(__file__).parent
    page_list_file = base_dir / "process_docs" / "页面清单.md"
    front_dir = base_dir / "output_sourcecode" / "front"
    output_file = base_dir / "output_docs" / "前端源代码.txt"
    
    print_info(f"检查目录: {base_dir}")
    print_info(f"页面清单: {page_list_file}")
    print_info(f"前端目录: {front_dir}")
    print()
    
    # 步骤1: 提取页面清单
    print_colored(Colors.CYAN, "📋 步骤1: 分析页面清单")
    expected_pages = extract_pages_from_page_list(page_list_file)
    
    if not expected_pages:
        print_warning("无法从页面清单中提取页面信息，尝试扫描前端目录...")
        if front_dir.exists():
            expected_pages = [f.name for f in front_dir.glob("*.html")]
        else:
            print_error("前端目录不存在且无法提取页面清单")
            return
    
    print_info(f"预期页面数量: {len(expected_pages)}")
    for page in expected_pages:
        print(f"  - {page}")
    print()
    
    # 步骤2: 检查文件存在性
    print_colored(Colors.CYAN, "📁 步骤2: 检查文件存在性")
    missing_pages = []
    existing_pages = []
    
    if not front_dir.exists():
        print_error(f"前端目录不存在: {front_dir}")
        return
    
    for page in expected_pages:
        page_file = front_dir / page
        if page_file.exists():
            existing_pages.append(page)
            print_success(f"文件存在: {page}")
        else:
            missing_pages.append(page)
            print_error(f"文件缺失: {page}")
    
    print()
    
    # 步骤3: 检查文件完整性
    print_colored(Colors.CYAN, "🔍 步骤3: 检查文件完整性")
    incomplete_pages = []
    
    for page in existing_pages:
        page_file = front_dir / page
        issues = check_html_completeness(page_file)
        
        if issues:
            incomplete_pages.append((page, issues))
            print_warning(f"文件不完整: {page}")
            for issue in issues:
                print(f"    - {issue}")
        else:
            print_success(f"文件完整: {page}")
    
    print()
    
    # 步骤4: 检查汇总文档
    print_colored(Colors.CYAN, "📄 步骤4: 检查汇总文档")
    
    if output_file.exists():
        try:
            with open(output_file, 'r', encoding='utf-8') as f:
                doc_content = f.read()
            
            file_size = os.path.getsize(output_file)
            if file_size > 1024 * 1024:
                size_str = f"{file_size / (1024 * 1024):.2f} MB"
            elif file_size > 1024:
                size_str = f"{file_size / 1024:.2f} KB"
            else:
                size_str = f"{file_size} bytes"
            
            print_success(f"汇总文档存在: {output_file}")
            print_info(f"文档大小: {size_str}")
            
            # 检查汇总文档中的页面数量
            page_separators = re.findall(r'=== (.+?\.html) ===', doc_content)
            print_info(f"汇总文档包含页面: {len(page_separators)}")
            
            # 检查是否有省略标记
            omission_found = re.search(r'此处省略|代码较长.*省略|其余.*类似|\[注：.*省略.*\]', doc_content, re.IGNORECASE)
            if omission_found:
                print_error(f"汇总文档包含省略标记: {omission_found.group()}")
            else:
                print_success("汇总文档无省略标记")
                
        except Exception as e:
            print_error(f"读取汇总文档失败: {e}")
    else:
        print_warning(f"汇总文档不存在: {output_file}")
        print_info("可以运行 python3 generate_frontend_sourcecode.py 生成")
    
    print()
    
    # 生成验证报告
    print_colored(Colors.CYAN, "📊 验证报告汇总")
    print("=" * 60)
    
    total_pages = len(expected_pages)
    existing_count = len(existing_pages)
    complete_count = existing_count - len(incomplete_pages)
    
    print(f"📋 预期页面数量: {total_pages}")
    print(f"📁 已生成页面数量: {existing_count}")
    print(f"✅ 完整页面数量: {complete_count}")
    print(f"❌ 缺失页面数量: {len(missing_pages)}")
    print(f"⚠️  不完整页面数量: {len(incomplete_pages)}")
    
    if total_pages > 0:
        completion_rate = (complete_count / total_pages) * 100
        print(f"💯 完成率: {completion_rate:.1f}%")
    
    print()
    
    # 问题总结
    if missing_pages or incomplete_pages:
        print_colored(Colors.RED, "🔧 需要修复的问题:")
        
        if missing_pages:
            print_error("缺失的页面:")
            for page in missing_pages:
                print(f"  - {page}")
        
        if incomplete_pages:
            print_warning("不完整的页面:")
            for page, issues in incomplete_pages:
                print(f"  - {page}:")
                for issue in issues:
                    print(f"    * {issue}")
        
        print()
        print_info("建议操作:")
        print("1. 检查页面清单是否正确定义了所有页面")
        print("2. 重新使用AI生成缺失或不完整的页面")
        print("3. 确保AI生成时遵循完整性要求")
        print("4. 运行 python3 generate_frontend_sourcecode.py 重新生成汇总文档")
    else:
        print_colored(Colors.GREEN, "🎉 所有页面验证通过！前端代码生成完整。")
    
    return len(missing_pages) + len(incomplete_pages)

def main():
    """主函数"""
    try:
        issues_count = validate_frontend_pages()
        return 0 if issues_count == 0 else 1
    except Exception as e:
        print_error(f"验证过程中发生错误: {e}")
        return 2

if __name__ == "__main__":
    exit(main())