From 3ae151404b9abd9165b7d22a14776ed49e123ecb Mon Sep 17 00:00:00 2001 From: hiderfong Date: Sat, 25 Apr 2026 20:49:38 +0800 Subject: [PATCH] fix: optimize compliance scan performance and improve error handling - Refactor scan_compliance to eliminate N+1 queries using joinedload and batch loading - Add try-except wrapper in compliance scan API endpoint - Improve frontend axios error interceptor to display detail/message/timeout errors - Update CORS config and nginx for domain deployment --- backend/app/api/v1/compliance.py | 15 +- backend/app/main.py | 15 +- backend/app/services/compliance_service.py | 140 +++--- backend/scripts/generate_test_data_20k.py | 542 +++++++++++++++++++++ docker-compose.prod.yml | 3 + frontend/nginx.conf | 14 + frontend/src/api/request.ts | 6 +- 7 files changed, 657 insertions(+), 78 deletions(-) create mode 100644 backend/scripts/generate_test_data_20k.py diff --git a/backend/app/api/v1/compliance.py b/backend/app/api/v1/compliance.py index 743d98dc..81622c32 100644 --- a/backend/app/api/v1/compliance.py +++ b/backend/app/api/v1/compliance.py @@ -1,5 +1,5 @@ from typing import Optional -from fastapi import APIRouter, Depends, Query +from fastapi import APIRouter, Depends, Query, HTTPException, status from sqlalchemy.orm import Session from app.core.database import get_db @@ -26,8 +26,16 @@ def scan_compliance( db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): - issues = compliance_service.scan_compliance(db, project_id=project_id) - return ResponseModel(data={"issues_found": len(issues)}) + try: + issues = compliance_service.scan_compliance(db, project_id=project_id) + return ResponseModel(data={"issues_found": len(issues)}) + except Exception: + import logging + logging.exception("Compliance scan failed") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="扫描执行失败,请稍后重试" + ) @router.get("/issues") @@ -67,6 +75,5 @@ def resolve_issue( ): issue = compliance_service.resolve_issue(db, issue_id) if not issue: - from fastapi import HTTPException, status raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="问题不存在") return ResponseModel(message="已标记为已解决") diff --git a/backend/app/main.py b/backend/app/main.py index d3ca25df..bc27c1c4 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -39,19 +39,9 @@ async def log_requests(request: Request, call_next): return response from app.core.database import SessionLocal + db = None try: db = SessionLocal() - body_bytes = b"" - if request.method in ["POST", "PUT", "PATCH"]: - try: - body_bytes = await request.body() - # Re-assign body for downstream - async def receive(): - return {"type": "http.request", "body": body_bytes} - request._receive = receive - except Exception: - pass - log_entry = log_models.OperationLog( module=request.url.path.split("/")[2] if len(request.url.path.split("/")) > 2 else "", action=request.url.path, @@ -66,7 +56,8 @@ async def log_requests(request: Request, call_next): except Exception: pass finally: - db.close() + if db: + db.close() return response diff --git a/backend/app/services/compliance_service.py b/backend/app/services/compliance_service.py index 0feead10..d11f5258 100644 --- a/backend/app/services/compliance_service.py +++ b/backend/app/services/compliance_service.py @@ -1,10 +1,9 @@ -from typing import List, Optional -from sqlalchemy.orm import Session from datetime import datetime +from typing import List, Optional, Set, Tuple +from sqlalchemy.orm import Session, joinedload from app.models.compliance import ComplianceRule, ComplianceIssue -from app.models.project import ClassificationProject, ClassificationResult -from app.models.classification import DataLevel +from app.models.project import ClassificationResult from app.models.masking import MaskingRule @@ -26,79 +25,98 @@ def init_builtin_rules(db: Session): def scan_compliance(db: Session, project_id: Optional[int] = None) -> List[ComplianceIssue]: """Run compliance scan and generate issues.""" rules = db.query(ComplianceRule).filter(ComplianceRule.is_active == True).all() - issues = [] + if not rules: + return [] # Get masking rules for check_masking logic masking_rules = db.query(MaskingRule).filter(MaskingRule.is_active == True).all() masking_level_ids = {r.level_id for r in masking_rules if r.level_id} - query = db.query(ClassificationProject) + # Build result filter and determine project ids + result_filter = [ClassificationResult.level_id.isnot(None)] + project_ids: List[int] = [] if project_id: - query = query.filter(ClassificationProject.id == project_id) - projects = query.all() + result_filter.append(ClassificationResult.project_id == project_id) + project_ids = [project_id] + else: + project_ids = [ + r[0] for r in db.query(ClassificationResult.project_id).distinct().all() + ] + if project_ids: + result_filter.append(ClassificationResult.project_id.in_(project_ids)) + else: + return [] - for project in projects: - results = db.query(ClassificationResult).filter( - ClassificationResult.project_id == project.id, - ClassificationResult.level_id.isnot(None), - ).all() + # Pre-load all results with level and column to avoid N+1 queries + results = db.query(ClassificationResult).options( + joinedload(ClassificationResult.level), + joinedload(ClassificationResult.column), + ).filter(*result_filter).all() - for r in results: - if not r.level: - continue - level_code = r.level.code + if not results: + return [] - for rule in rules: - matched = False - desc = "" - suggestion = "" + # Batch query existing open issues + existing_issues = db.query(ComplianceIssue).filter( + ComplianceIssue.project_id.in_(project_ids), + ComplianceIssue.status == "open", + ).all() + existing_set: Set[Tuple[int, int, str, int]] = { + (i.rule_id, i.project_id, i.entity_type, i.entity_id) for i in existing_issues + } - if rule.check_logic == "check_masking" and level_code in ("L4", "L5"): - if r.level_id not in masking_level_ids: - matched = True - desc = f"字段 '{r.column.name if r.column else '未知'}' 为 {level_code} 级,但未配置脱敏规则" - suggestion = "请在【数据脱敏】模块为该分级配置脱敏策略" + issues = [] + for r in results: + if not r.level: + continue + level_code = r.level.code - elif rule.check_logic == "check_encryption" and level_code == "L5": - # Placeholder: no encryption check in MVP, always flag + for rule in rules: + matched = False + desc = "" + suggestion = "" + + if rule.check_logic == "check_masking" and level_code in ("L4", "L5"): + if r.level_id not in masking_level_ids: matched = True - desc = f"字段 '{r.column.name if r.column else '未知'}' 为 L5 级核心数据,建议确认是否加密存储" - suggestion = "请确认该字段在数据库中已加密存储" + desc = f"字段 '{r.column.name if r.column else '未知'}' 为 {level_code} 级,但未配置脱敏规则" + suggestion = "请在【数据脱敏】模块为该分级配置脱敏策略" - elif rule.check_logic == "check_level" and level_code in ("L4", "L5"): - if r.source == "auto": - matched = True - desc = f"个人敏感字段 '{r.column.name if r.column else '未知'}' 目前为自动识别,建议人工复核并确认授权" - suggestion = "请人工确认该字段的处理已取得合法授权" + elif rule.check_logic == "check_encryption" and level_code == "L5": + # Placeholder: no encryption check in MVP, always flag + matched = True + desc = f"字段 '{r.column.name if r.column else '未知'}' 为 L5 级核心数据,建议确认是否加密存储" + suggestion = "请确认该字段在数据库中已加密存储" - elif rule.check_logic == "check_audit": - # Placeholder for cross-border check - pass + elif rule.check_logic == "check_level" and level_code in ("L4", "L5"): + if r.source == "auto": + matched = True + desc = f"个人敏感字段 '{r.column.name if r.column else '未知'}' 目前为自动识别,建议人工复核并确认授权" + suggestion = "请人工确认该字段的处理已取得合法授权" - if matched: - # Check if open issue already exists - existing = db.query(ComplianceIssue).filter( - ComplianceIssue.rule_id == rule.id, - ComplianceIssue.project_id == project.id, - ComplianceIssue.entity_type == "column", - ComplianceIssue.entity_id == (r.column_id or 0), - ComplianceIssue.status == "open", - ).first() - if not existing: - issue = ComplianceIssue( - rule_id=rule.id, - project_id=project.id, - entity_type="column", - entity_id=r.column_id or 0, - entity_name=r.column.name if r.column else "未知", - severity=rule.severity, - description=desc, - suggestion=suggestion, - ) - db.add(issue) - issues.append(issue) + elif rule.check_logic == "check_audit": + # Placeholder for cross-border check + pass - db.commit() + if matched: + key = (rule.id, r.project_id, "column", r.column_id or 0) + if key not in existing_set: + issue = ComplianceIssue( + rule_id=rule.id, + project_id=r.project_id, + entity_type="column", + entity_id=r.column_id or 0, + entity_name=r.column.name if r.column else "未知", + severity=rule.severity, + description=desc, + suggestion=suggestion, + ) + db.add(issue) + issues.append(issue) + existing_set.add(key) + + if issues: + db.commit() return issues diff --git a/backend/scripts/generate_test_data_20k.py b/backend/scripts/generate_test_data_20k.py new file mode 100644 index 00000000..b8e0bd09 --- /dev/null +++ b/backend/scripts/generate_test_data_20k.py @@ -0,0 +1,542 @@ +""" +Generate test data for DataPointer system. +Targets: 10000+ records across all tables. +""" +import sys +sys.path.insert(0, '/app') + +import random +import string +import json +from datetime import datetime, timedelta +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from app.core.database import Base +from app.models.user import User, Role, Dept, UserRole +from app.models.metadata import DataSource, Database, DataTable, DataColumn +from app.models.classification import Category, DataLevel, RecognitionRule, ClassificationTemplate +from app.models.project import ClassificationProject, ClassificationTask, ClassificationResult, ResultStatus +from app.models.log import OperationLog +from app.core.security import get_password_hash + +# Database connection +DATABASE_URL = "postgresql+psycopg2://pdg:pdg_secret_2024@db:5432/prop_data_guard" +engine = create_engine(DATABASE_URL) +SessionLocal = sessionmaker(bind=engine) +db = SessionLocal() + +# Clear existing test data (preserve admin user and built-in data) +print("Clearing existing test data...") +db.query(ClassificationResult).delete(synchronize_session=False) +db.query(ClassificationTask).delete(synchronize_session=False) +db.query(ClassificationProject).delete(synchronize_session=False) +db.query(DataColumn).delete(synchronize_session=False) +db.query(DataTable).delete(synchronize_session=False) +db.query(Database).delete(synchronize_session=False) +db.query(UserRole).filter(UserRole.user_id > 1).delete(synchronize_session=False) +db.query(User).filter(User.id > 1).delete(synchronize_session=False) +db.query(Dept).filter(Dept.id > 1).delete(synchronize_session=False) +db.query(OperationLog).delete(synchronize_session=False) +db.commit() + +# Reset all sequences to avoid ID conflicts +from sqlalchemy import text +sequences = [ + "sys_dept_id_seq", "sys_user_id_seq", "sys_user_role_id_seq", + "data_source_id_seq", "meta_database_id_seq", "meta_table_id_seq", "meta_column_id_seq", + "classification_project_id_seq", "classification_task_id_seq", "classification_result_id_seq", + "classification_change_id_seq", "sys_operation_log_id_seq", +] +for seq in sequences: + db.execute(text(f"ALTER SEQUENCE {seq} RESTART WITH 100")) +db.commit() +print(" Sequences reset") + +random.seed(42) + +# ============================================================ +# 1. Departments +# ============================================================ +print("Generating departments...") +root_dept_names = ["数据安全部", "合规管理部", "信息技术部"] +root_depts = [] +for name in root_dept_names: + d = Dept(name=name, parent_id=None, sort_order=len(root_depts)) + db.add(d) + root_depts.append(d) +db.commit() +for d in root_depts: + db.refresh(d) + +# Map root depts by index: 0=数据安全部, 1=合规管理部, 2=信息技术部 +root_id_map = {i+1: d.id for i, d in enumerate(root_depts)} + +child_dept_defs = [ + ("业务一部", root_id_map[1]), ("业务二部", root_id_map[1]), + ("车险事业部", root_id_map[3]), ("非车险事业部", root_id_map[3]), ("理赔服务部", root_id_map[3]), + ("财务部", root_id_map[2]), ("精算部", root_id_map[2]), + ("客户服务部", root_id_map[1]), ("渠道管理部", root_id_map[1]), +] +depts = root_depts[:] +for name, pid in child_dept_defs: + d = Dept(name=name, parent_id=pid, sort_order=len(depts)) + db.add(d) + depts.append(d) +db.commit() +for d in depts[len(root_depts):]: + db.refresh(d) +print(f" Created {len(depts)} departments") + +# ============================================================ +# 2. Users +# ============================================================ +print("Generating users...") +roles = db.query(Role).all() +role_map = {r.code: r.id for r in roles} + +first_names = ["王", "李", "张", "刘", "陈", "杨", "赵", "黄", "周", "吴", "徐", "孙", "马", "朱", "胡", "郭", "林", "何", "高", "罗"] +last_names = ["伟", "芳", "娜", "敏", "静", "丽", "强", "磊", "军", "洋", "勇", "艳", "杰", "娟", "涛", "明", "超", "秀英", "华", "平"] + +def random_name(): + return random.choice(first_names) + random.choice(last_names) + +def random_phone(): + return "1" + random.choice(["3","4","5","6","7","8","9"]) + "".join(random.choices(string.digits, k=9)) + +users = [] +for i in range(120): + real = random_name() + username = f"user{i+2:03d}" + user = User( + username=username, + email=f"{username}@datapo.com", + hashed_password=get_password_hash("password123"), + real_name=real, + phone=random_phone(), + is_active=random.random() > 0.05, + is_superuser=False, + dept_id=random.choice(depts).id, + ) + db.add(user) + users.append(user) +db.commit() +for u in users: + db.refresh(u) + +# Assign roles +role_list = list(roles) +for u in users: + assigned_roles = random.sample(role_list, k=random.randint(1, 2)) + for r in assigned_roles: + db.add(UserRole(user_id=u.id, role_id=r.id)) +db.commit() +print(f" Created {len(users)} users") + +# ============================================================ +# 3. Data Sources +# ============================================================ +print("Generating data sources...") +source_types = ["postgresql", "mysql", "oracle", "sqlserver", "dm"] +source_configs = [ + ("核心保单数据库", "postgresql", "db-core-prod", 5432, "core_policy"), + ("理赔系统数据库", "mysql", "db-claim-prod", 3306, "claim_db"), + ("财务数据仓库", "postgresql", "db-finance-dw", 5432, "finance_dw"), + ("客户信息主库", "mysql", "db-cust-master", 3306, "customer_master"), + ("渠道管理系统", "oracle", "db-channel-ora", 1521, "CHANNEL"), + ("精算分析平台", "postgresql", "db-actuary-ana", 5432, "actuary_analytics"), + ("监管报送库", "mysql", "db-regulatory", 3306, "regulatory_report"), + ("车辆信息库", "postgresql", "db-vehicle", 5432, "vehicle_db"), + ("非车险业务库", "sqlserver", "db-nonauto", 1433, "NonAutoDB"), + ("历史归档库", "postgresql", "db-archive", 5432, "archive_db"), + ("测试环境核心库", "postgresql", "db-core-test", 5432, "core_test"), + ("达梦国产数据库", "dm", "db-dameng-prod", 5236, "DAMENG"), +] + +sources = [] +for name, stype, host, port, dbname in source_configs: + ds = DataSource( + name=name, + source_type=stype, + host=f"{host}.internal.company.com", + port=port, + database_name=dbname, + username=f"{stype}_admin", + encrypted_password=None, + status="active" if random.random() > 0.1 else "error", + dept_id=random.choice(depts).id, + created_by=random.choice(users).id, + ) + db.add(ds) + sources.append(ds) +db.commit() +for s in sources: + db.refresh(s) +print(f" Created {len(sources)} data sources") + +# ============================================================ +# 4. Databases +# ============================================================ +print("Generating databases...") +databases = [] +for source in sources: + num_dbs = random.randint(1, 3) + for i in range(num_dbs): + d = Database( + source_id=source.id, + name=f"{source.database_name}_{i+1}" if num_dbs > 1 else source.database_name, + charset="UTF8" if source.source_type != "sqlserver" else "Chinese_PRC_CI_AS", + table_count=0, + ) + db.add(d) + databases.append(d) +db.commit() +for d in databases: + db.refresh(d) +print(f" Created {len(databases)} databases") + +# ============================================================ +# 5. Data Tables & Columns (the big one) +# ============================================================ +print("Generating tables and columns...") + +table_prefixes = { + "policy": ["t_policy", "t_policy_detail", "t_policy_extension", "t_policy_history", "t_endorsement"], + "claim": ["t_claim", "t_claim_detail", "t_claim_payment", "t_claim_document", "t_survey"], + "customer": ["t_customer", "t_customer_contact", "t_customer_identity", "t_customer_vehicle", "t_customer_preference"], + "finance": ["t_payment", "t_receipt", "t_invoice", "t_commission", "t_reserve"], + "channel": ["t_agent", "t_agent_contract", "t_partner", "t_broker", "t_sales_record"], + "actuary": ["t_pricing_model", "t_risk_factor", "t_loss_ratio", "t_reserve_calc", "t_solvency"], + "regulatory": ["t_report_cbrc", "t_report_circ", "t_stat_premium", "t_stat_claim", "t_stat_channel"], + "vehicle": ["t_vehicle", "t_vehicle_model", "t_vehicle_usage", "t_vehicle_accident", "t_vehicle_maintenance"], + "system": ["t_user", "t_role", "t_permission", "t_log", "t_config", "t_dict"], + "archive": ["t_archive_policy", "t_archive_claim", "t_archive_customer", "t_archive_finance"], +} + +column_templates = [ + ("id", "BIGINT", "主键ID", "system", 2), + ("created_at", "TIMESTAMP", "创建时间", "system", 2), + ("updated_at", "TIMESTAMP", "更新时间", "system", 2), + ("is_deleted", "BOOLEAN", "是否删除", "system", 2), + ("created_by", "BIGINT", "创建人", "system", 2), + ("customer_name", "VARCHAR", "客户姓名", "customer", 4), + ("customer_id_no", "VARCHAR", "客户身份证号", "customer", 4), + ("mobile_phone", "VARCHAR", "手机号码", "customer", 4), + ("email", "VARCHAR", "电子邮箱", "customer", 3), + ("address", "VARCHAR", "联系地址", "customer", 3), + ("bank_account", "VARCHAR", "银行账户", "finance", 4), + ("bank_card_no", "VARCHAR", "银行卡号", "finance", 4), + ("policy_no", "VARCHAR", "保单号", "policy", 3), + ("policy_status", "VARCHAR", "保单状态", "policy", 2), + ("premium_amount", "DECIMAL", "保费金额", "finance", 3), + ("claim_no", "VARCHAR", "理赔号", "claim", 3), + ("claim_amount", "DECIMAL", "理赔金额", "claim", 4), + ("loss_description", "TEXT", "损失描述", "claim", 3), + ("accident_location", "VARCHAR", "出险地点", "claim", 3), + ("vehicle_plate", "VARCHAR", "车牌号", "vehicle", 3), + ("vin_code", "VARCHAR", "车辆识别代码VIN", "vehicle", 4), + ("agent_name", "VARCHAR", "代理人姓名", "channel", 3), + ("agent_license", "VARCHAR", "代理人执业证号", "channel", 3), + ("commission_rate", "DECIMAL", "佣金比例", "finance", 3), + ("reserve_amount", "DECIMAL", "准备金金额", "finance", 5), + ("solvency_ratio", "DECIMAL", "偿付能力充足率", "finance", 5), + ("password_hash", "VARCHAR", "密码哈希", "system", 5), + ("api_secret", "VARCHAR", "API密钥", "system", 5), + ("session_token", "VARCHAR", "会话令牌", "system", 4), + ("gps_location", "VARCHAR", "GPS定位信息", "vehicle", 4), + ("driving_record", "TEXT", "行驶记录", "vehicle", 4), + ("medical_record", "TEXT", "医疗记录", "claim", 4), + ("income_info", "DECIMAL", "收入信息", "customer", 4), + ("credit_score", "INT", "信用评分", "customer", 4), + ("family_member", "VARCHAR", "家庭成员信息", "customer", 3), + ("emergency_contact", "VARCHAR", "紧急联系人", "customer", 3), + ("beneficiary_name", "VARCHAR", "受益人姓名", "policy", 4), + ("beneficiary_id_no", "VARCHAR", "受益人身份证号", "policy", 4), + ("underwriting_decision", "VARCHAR", "核保结论", "policy", 3), + ("risk_score", "DECIMAL", "风险评分", "actuary", 3), + ("fraud_flag", "BOOLEAN", "欺诈标记", "claim", 3), + ("audit_comment", "TEXT", "审计意见", "system", 3), + ("report_period", "VARCHAR", "报表期间", "regulatory", 2), + ("regulatory_code", "VARCHAR", "监管编码", "regulatory", 2), +] + +all_tables = [] +all_columns = [] + +for database in databases: + prefix_key = "system" + for k in table_prefixes: + if k in database.name.lower() or k in database.source.name.lower(): + prefix_key = k + break + + prefix_list = table_prefixes.get(prefix_key, table_prefixes["system"]) + num_tables = random.randint(25, 60) + + for tidx in range(num_tables): + table_name = f"{random.choice(prefix_list)}_{tidx+1:03d}" + tbl = DataTable( + database_id=database.id, + name=table_name, + comment=f"{table_name}数据表", + row_count=random.randint(10000, 10000000), + column_count=0, + ) + db.add(tbl) + all_tables.append(tbl) + +db.commit() +for t in all_tables: + db.refresh(t) + +print(f" Created {len(all_tables)} tables") + +# Now generate columns +print(" Generating columns (this may take a moment)...") +levels = db.query(DataLevel).all() +level_map = {l.code: l.id for l in levels} + +categories = db.query(Category).all() +cat_map = {} +for c in categories: + if c.code.startswith("CUST") and "customer" not in cat_map: + cat_map["customer"] = c.id + elif c.code.startswith("POLICY") and "policy" not in cat_map: + cat_map["policy"] = c.id + elif c.code.startswith("CLAIM") and "claim" not in cat_map: + cat_map["claim"] = c.id + elif c.code.startswith("FIN") and "finance" not in cat_map: + cat_map["finance"] = c.id + elif c.code.startswith("CHANNEL") and "channel" not in cat_map: + cat_map["channel"] = c.id + elif c.code.startswith("REG") and "regulatory" not in cat_map: + cat_map["regulatory"] = c.id + elif c.code.startswith("INT") and "system" not in cat_map: + cat_map["system"] = c.id + elif c.code.startswith("SUB") and "vehicle" not in cat_map: + cat_map["vehicle"] = c.id + +sample_values = { + "customer_name": ["张三", "李四", "王五", "赵六", "钱七"], + "customer_id_no": ["110101199001011234", "310101198502023456", "440106197803034567"], + "mobile_phone": ["13800138000", "13900139000", "13700137000"], + "email": ["user1@example.com", "user2@test.com", "contact@company.com"], + "bank_card_no": ["6222021234567890123", "6228481234567890123"], + "vin_code": ["LSVAG2180E2100001", "LFV3A28K8A3000001"], + "vehicle_plate": ["京A12345", "沪B67890", "粤C11111"], + "policy_no": ["PICC2024000001", "PICC2024000002", "PICC2024000003"], + "claim_no": ["CLM2024000001", "CLM2024000002", "CLM2024000003"], + "address": ["北京市海淀区xxx路1号", "上海市浦东新区xxx路2号"], +} + +batch_size = 500 +column_batch = [] + +for tbl in all_tables: + num_cols = random.randint(15, 35) + selected_templates = random.sample(column_templates, k=min(num_cols, len(column_templates))) + + for cidx, (col_name, col_type, comment, cat_hint, lvl_hint) in enumerate(selected_templates): + actual_name = col_name if cidx == 0 else f"{col_name}_{cidx}" + samples = None + if col_name in sample_values: + samples = json.dumps(random.sample(sample_values[col_name], k=min(3, len(sample_values[col_name]))), ensure_ascii=False) + + col = DataColumn( + table_id=tbl.id, + name=actual_name, + data_type=col_type, + length=random.choice([20, 50, 100, 200, 500]) if "VARCHAR" in col_type else None, + comment=comment, + is_nullable=random.random() > 0.2, + sample_data=samples, + ) + column_batch.append(col) + + if len(column_batch) >= batch_size: + db.bulk_save_objects(column_batch) + db.commit() + all_columns.extend(column_batch) + column_batch = [] + +if column_batch: + db.bulk_save_objects(column_batch) + db.commit() + all_columns.extend(column_batch) + +print(f" Created {len(all_columns)} columns") + +# Update table counts +for tbl in all_tables: + tbl.column_count = db.query(DataColumn).filter(DataColumn.table_id == tbl.id).count() + db.add(tbl) +db.commit() + +for database in databases: + database.table_count = db.query(DataTable).filter(DataTable.database_id == database.id).count() + db.add(database) +db.commit() + +# ============================================================ +# 6. Classification Projects +# ============================================================ +print("Generating classification projects...") +templates = db.query(ClassificationTemplate).all() +projects = [] +project_names = [ + "2024年度数据分类分级专项", + "核心系统敏感数据梳理", + "新核心上线数据定级", + "客户个人信息保护专项", + "财务数据安全治理", + "理赔数据合规检查", + "渠道数据梳理项目", + "监管报送数据定级", +] + +for i, name in enumerate(project_names): + p = ClassificationProject( + name=name, + template_id=random.choice(templates).id, + description=f"{name} - 数据分类分级治理项目", + status=random.choice(["created", "scanning", "labeling", "reviewing", "published"]), + target_source_ids=",".join(str(s.id) for s in random.sample(sources, k=random.randint(2, 5))), + planned_start=datetime.now() - timedelta(days=random.randint(10, 60)), + planned_end=datetime.now() + timedelta(days=random.randint(10, 90)), + created_by=random.choice(users).id, + ) + db.add(p) + projects.append(p) +db.commit() +for p in projects: + db.refresh(p) +print(f" Created {len(projects)} projects") + +# ============================================================ +# 7. Classification Results (the critical mass) +# ============================================================ +print("Generating classification results...") + +# Re-fetch column IDs from DB since bulk_save_objects doesn't populate object IDs +col_rows = db.query(DataColumn.id).all() +all_col_ids = [c[0] for c in col_rows] +random.shuffle(all_col_ids) + +result_batch = [] +total_results_target = 20000 +results_per_project = total_results_target // len(projects) + +for proj in projects: + assigned_cols = random.sample(all_col_ids, k=min(results_per_project, len(all_col_ids))) + + for col_id in assigned_cols: + source_type = random.choices(["auto", "manual"], weights=[0.7, 0.3])[0] + status_val = "auto" if source_type == "auto" else random.choice(["manual", "reviewed"]) + + cat = random.choice(categories) + lvl = random.choice(levels) + conf = round(random.uniform(0.3, 0.98), 2) + + r = ClassificationResult( + project_id=proj.id, + column_id=col_id, + category_id=cat.id, + level_id=lvl.id, + source=source_type, + confidence=conf, + status=status_val, + labeler_id=random.choice(users).id if source_type == "manual" else None, + ) + result_batch.append(r) + + if len(result_batch) >= batch_size: + db.bulk_save_objects(result_batch) + db.commit() + result_batch = [] + +if result_batch: + db.bulk_save_objects(result_batch) + db.commit() + +total_results = db.query(ClassificationResult).count() +print(f" Created {total_results} classification results") + +# ============================================================ +# 8. Classification Tasks +# ============================================================ +print("Generating classification tasks...") +tasks = [] +for proj in projects: + num_tasks = random.randint(2, 5) + for tidx in range(num_tasks): + task = ClassificationTask( + project_id=proj.id, + name=f"{proj.name}-任务{tidx+1}", + assigner_id=random.choice(users).id, + assignee_id=random.choice(users).id, + target_type="column", + status=random.choice(["pending", "in_progress", "completed"]), + deadline=datetime.now() + timedelta(days=random.randint(5, 30)), + ) + db.add(task) + tasks.append(task) +db.commit() +print(f" Created {len(tasks)} tasks") + +# ============================================================ +# 9. Operation Logs +# ============================================================ +print("Generating operation logs...") +log_actions = ["登录", "查询数据源", "创建项目", "自动分类", "人工打标", "导出报告", "修改规则", "删除任务"] +log_modules = ["auth", "datasource", "project", "classification", "task", "report", "rule", "system"] + +log_batch = [] +for i in range(8000): + log = OperationLog( + user_id=random.choice([None] + [u.id for u in users]), + username=random.choice(["admin"] + [u.username for u in users]), + module=random.choice(log_modules), + action=random.choice(log_actions), + method=random.choice(["GET", "POST", "PUT", "DELETE"]), + path=f"/api/v1/{random.choice(log_modules)}/{random.randint(1, 100)}", + ip=f"10.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(0,255)}", + status_code=random.choice([200, 200, 200, 201, 400, 401, 404, 500]), + duration_ms=random.randint(10, 2000), + created_at=datetime.now() - timedelta(days=random.randint(0, 30), hours=random.randint(0, 23)), + ) + log_batch.append(log) + if len(log_batch) >= batch_size: + db.bulk_save_objects(log_batch) + db.commit() + log_batch = [] + +if log_batch: + db.bulk_save_objects(log_batch) + db.commit() + +total_logs = db.query(OperationLog).count() +print(f" Created {total_logs} operation logs") + +# ============================================================ +# Summary +# ============================================================ +print("\n" + "="*60) +print("Test data generation complete!") +print("="*60) +print(f" Departments: {db.query(Dept).count()}") +print(f" Users: {db.query(User).count()}") +print(f" Data Sources: {db.query(DataSource).count()}") +print(f" Databases: {db.query(Database).count()}") +print(f" Tables: {db.query(DataTable).count()}") +print(f" Columns: {db.query(DataColumn).count()}") +print(f" Categories: {db.query(Category).count()}") +print(f" Data Levels: {db.query(DataLevel).count()}") +print(f" Rules: {db.query(RecognitionRule).count()}") +print(f" Templates: {db.query(ClassificationTemplate).count()}") +print(f" Projects: {db.query(ClassificationProject).count()}") +print(f" Tasks: {db.query(ClassificationTask).count()}") +print(f" Results: {db.query(ClassificationResult).count()}") +print(f" Operation Logs: {db.query(OperationLog).count()}") +print("="*60) + +db.close() diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index d656063d..eb134899 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -78,6 +78,9 @@ services: container_name: pdg-frontend ports: - "80:80" + - "443:443" + volumes: + - ./ssl:/etc/nginx/ssl:ro depends_on: - backend restart: unless-stopped diff --git a/frontend/nginx.conf b/frontend/nginx.conf index a48c67d5..2a2f8c69 100644 --- a/frontend/nginx.conf +++ b/frontend/nginx.conf @@ -1,9 +1,23 @@ +# HTTP redirect to HTTPS server { listen 80; server_name datapointer.cnroc.cn localhost _; + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl; + server_name datapointer.cnroc.cn localhost _; root /usr/share/nginx/html; index index.html; + # SSL certificates + ssl_certificate /etc/nginx/ssl/fullchain.pem; + ssl_certificate_key /etc/nginx/ssl/privkey.pem; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + ssl_prefer_server_ciphers on; + # Gzip compression gzip on; gzip_vary on; diff --git a/frontend/src/api/request.ts b/frontend/src/api/request.ts index afefc9a9..93e51c3d 100644 --- a/frontend/src/api/request.ts +++ b/frontend/src/api/request.ts @@ -36,7 +36,11 @@ request.interceptors.response.use( localStorage.removeItem('dp_refresh') window.location.href = '/login' } else { - ElMessage.error((error.response?.data as any)?.message || '网络错误') + const data = error.response?.data as any + const detail = Array.isArray(data?.detail) + ? data.detail.map((d: any) => d.msg || JSON.stringify(d)).join(', ') + : data?.detail + ElMessage.error(detail || data?.message || error.message || '网络错误') } return Promise.reject(error) }