fix: optimize compliance scan performance and improve error handling

- Refactor scan_compliance to eliminate N+1 queries using joinedload and batch loading - Add try-except wrapper in compliance scan API endpoint - Improve frontend axios error interceptor to display detail/message/timeout errors - Update CORS config and nginx for domain deployment
2026-04-25 20:49:38 +08:00
parent 6d35cfa5b7
commit 34466a1ae9
10 changed files with 690 additions and 105 deletions
@@ -1,10 +1,9 @@
-from typing import List, Optional
-from sqlalchemy.orm import Session
 from datetime import datetime
+from typing import List, Optional, Set, Tuple
+from sqlalchemy.orm import Session, joinedload

 from app.models.compliance import ComplianceRule, ComplianceIssue
-from app.models.project import ClassificationProject, ClassificationResult
-from app.models.classification import DataLevel
+from app.models.project import ClassificationResult
 from app.models.masking import MaskingRule


@@ -26,79 +25,98 @@ def init_builtin_rules(db: Session):
 def scan_compliance(db: Session, project_id: Optional[int] = None) -> List[ComplianceIssue]:
    """Run compliance scan and generate issues."""
    rules = db.query(ComplianceRule).filter(ComplianceRule.is_active == True).all()
-    issues = []
+    if not rules:
+        return []

    # Get masking rules for check_masking logic
    masking_rules = db.query(MaskingRule).filter(MaskingRule.is_active == True).all()
    masking_level_ids = {r.level_id for r in masking_rules if r.level_id}

-    query = db.query(ClassificationProject)
+    # Build result filter and determine project ids
+    result_filter = [ClassificationResult.level_id.isnot(None)]
+    project_ids: List[int] = []
    if project_id:
-        query = query.filter(ClassificationProject.id == project_id)
-    projects = query.all()
+        result_filter.append(ClassificationResult.project_id == project_id)
+        project_ids = [project_id]
+    else:
+        project_ids = [
+            r[0] for r in db.query(ClassificationResult.project_id).distinct().all()
+        ]
+        if project_ids:
+            result_filter.append(ClassificationResult.project_id.in_(project_ids))
+        else:
+            return []

-    for project in projects:
-        results = db.query(ClassificationResult).filter(
-            ClassificationResult.project_id == project.id,
-            ClassificationResult.level_id.isnot(None),
-        ).all()
+    # Pre-load all results with level and column to avoid N+1 queries
+    results = db.query(ClassificationResult).options(
+        joinedload(ClassificationResult.level),
+        joinedload(ClassificationResult.column),
+    ).filter(*result_filter).all()

-        for r in results:
-            if not r.level:
-                continue
-            level_code = r.level.code
+    if not results:
+        return []

-            for rule in rules:
-                matched = False
-                desc = ""
-                suggestion = ""
+    # Batch query existing open issues
+    existing_issues = db.query(ComplianceIssue).filter(
+        ComplianceIssue.project_id.in_(project_ids),
+        ComplianceIssue.status == "open",
+    ).all()
+    existing_set: Set[Tuple[int, int, str, int]] = {
+        (i.rule_id, i.project_id, i.entity_type, i.entity_id) for i in existing_issues
+    }

-                if rule.check_logic == "check_masking" and level_code in ("L4", "L5"):
-                    if r.level_id not in masking_level_ids:
-                        matched = True
-                        desc = f"字段 '{r.column.name if r.column else '未知'}' 为 {level_code} 级，但未配置脱敏规则"
-                        suggestion = "请在【数据脱敏】模块为该分级配置脱敏策略"
+    issues = []
+    for r in results:
+        if not r.level:
+            continue
+        level_code = r.level.code

-                elif rule.check_logic == "check_encryption" and level_code == "L5":
-                    # Placeholder: no encryption check in MVP, always flag
+        for rule in rules:
+            matched = False
+            desc = ""
+            suggestion = ""
+
+            if rule.check_logic == "check_masking" and level_code in ("L4", "L5"):
+                if r.level_id not in masking_level_ids:
                    matched = True
-                    desc = f"字段 '{r.column.name if r.column else '未知'}' 为 L5 级核心数据，建议确认是否加密存储"
-                    suggestion = "请确认该字段在数据库中已加密存储"
+                    desc = f"字段 '{r.column.name if r.column else '未知'}' 为 {level_code} 级，但未配置脱敏规则"
+                    suggestion = "请在【数据脱敏】模块为该分级配置脱敏策略"

-                elif rule.check_logic == "check_level" and level_code in ("L4", "L5"):
-                    if r.source == "auto":
-                        matched = True
-                        desc = f"个人敏感字段 '{r.column.name if r.column else '未知'}' 目前为自动识别，建议人工复核并确认授权"
-                        suggestion = "请人工确认该字段的处理已取得合法授权"
+            elif rule.check_logic == "check_encryption" and level_code == "L5":
+                # Placeholder: no encryption check in MVP, always flag
+                matched = True
+                desc = f"字段 '{r.column.name if r.column else '未知'}' 为 L5 级核心数据，建议确认是否加密存储"
+                suggestion = "请确认该字段在数据库中已加密存储"

-                elif rule.check_logic == "check_audit":
-                    # Placeholder for cross-border check
-                    pass
+            elif rule.check_logic == "check_level" and level_code in ("L4", "L5"):
+                if r.source == "auto":
+                    matched = True
+                    desc = f"个人敏感字段 '{r.column.name if r.column else '未知'}' 目前为自动识别，建议人工复核并确认授权"
+                    suggestion = "请人工确认该字段的处理已取得合法授权"

-                if matched:
-                    # Check if open issue already exists
-                    existing = db.query(ComplianceIssue).filter(
-                        ComplianceIssue.rule_id == rule.id,
-                        ComplianceIssue.project_id == project.id,
-                        ComplianceIssue.entity_type == "column",
-                        ComplianceIssue.entity_id == (r.column_id or 0),
-                        ComplianceIssue.status == "open",
-                    ).first()
-                    if not existing:
-                        issue = ComplianceIssue(
-                            rule_id=rule.id,
-                            project_id=project.id,
-                            entity_type="column",
-                            entity_id=r.column_id or 0,
-                            entity_name=r.column.name if r.column else "未知",
-                            severity=rule.severity,
-                            description=desc,
-                            suggestion=suggestion,
-                        )
-                        db.add(issue)
-                        issues.append(issue)
+            elif rule.check_logic == "check_audit":
+                # Placeholder for cross-border check
+                pass

-    db.commit()
+            if matched:
+                key = (rule.id, r.project_id, "column", r.column_id or 0)
+                if key not in existing_set:
+                    issue = ComplianceIssue(
+                        rule_id=rule.id,
+                        project_id=r.project_id,
+                        entity_type="column",
+                        entity_id=r.column_id or 0,
+                        entity_name=r.column.name if r.column else "未知",
+                        severity=rule.severity,
+                        description=desc,
+                        suggestion=suggestion,
+                    )
+                    db.add(issue)
+                    issues.append(issue)
+                    existing_set.add(key)
+
+    if issues:
+        db.commit()
    return issues