fix: optimize compliance scan performance and improve error handling

- Refactor scan_compliance to eliminate N+1 queries using joinedload and batch loading
- Add try-except wrapper in compliance scan API endpoint
- Improve frontend axios error interceptor to display detail/message/timeout errors
- Update CORS config and nginx for domain deployment
This commit is contained in:
hiderfong
2026-04-25 20:49:38 +08:00
parent 6d35cfa5b7
commit 34466a1ae9
10 changed files with 690 additions and 105 deletions
+9 -2
View File
@@ -1,5 +1,5 @@
from typing import Optional
from fastapi import APIRouter, Depends, Query
from fastapi import APIRouter, Depends, Query, HTTPException, status
from sqlalchemy.orm import Session
from app.core.database import get_db
@@ -26,8 +26,16 @@ def scan_compliance(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
try:
issues = compliance_service.scan_compliance(db, project_id=project_id)
return ResponseModel(data={"issues_found": len(issues)})
except Exception:
import logging
logging.exception("Compliance scan failed")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="扫描执行失败,请稍后重试"
)
@router.get("/issues")
@@ -67,6 +75,5 @@ def resolve_issue(
):
issue = compliance_service.resolve_issue(db, issue_id)
if not issue:
from fastapi import HTTPException, status
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="问题不存在")
return ResponseModel(message="已标记为已解决")
+9 -4
View File
@@ -1,5 +1,5 @@
from typing import Optional
from fastapi import APIRouter, Depends, Query
from fastapi import APIRouter, Depends, Query, HTTPException, status
from sqlalchemy.orm import Session
from app.core.database import get_db
@@ -89,7 +89,6 @@ def delete_project(
):
p = project_service.get_project(db, project_id)
if not p:
from fastapi import HTTPException, status
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="项目不存在")
# Only admin or project creator can delete
if not _is_admin(current_user) and p.created_by != current_user.id:
@@ -110,9 +109,9 @@ def project_auto_classify(
project = project_service.get_project(db, project_id)
if not project:
from fastapi import HTTPException, status
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="项目不存在")
try:
if background:
# Check if already running
if project.celery_task_id:
@@ -136,6 +135,13 @@ def project_auto_classify(
project.status = "created"
db.commit()
return ResponseModel(data=result)
except Exception as e:
import logging
logging.exception("Auto classify failed")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"自动分类执行失败: {str(e)}"
)
@router.get("/{project_id}/auto-classify-status")
@@ -149,7 +155,6 @@ def project_auto_classify_status(
project = project_service.get_project(db, project_id)
if not project:
from fastapi import HTTPException, status
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="项目不存在")
task_id = project.celery_task_id
+2 -11
View File
@@ -39,19 +39,9 @@ async def log_requests(request: Request, call_next):
return response
from app.core.database import SessionLocal
db = None
try:
db = SessionLocal()
body_bytes = b""
if request.method in ["POST", "PUT", "PATCH"]:
try:
body_bytes = await request.body()
# Re-assign body for downstream
async def receive():
return {"type": "http.request", "body": body_bytes}
request._receive = receive
except Exception:
pass
log_entry = log_models.OperationLog(
module=request.url.path.split("/")[2] if len(request.url.path.split("/")) > 2 else "",
action=request.url.path,
@@ -66,6 +56,7 @@ async def log_requests(request: Request, call_next):
except Exception:
pass
finally:
if db:
db.close()
return response
+40 -22
View File
@@ -1,10 +1,9 @@
from typing import List, Optional
from sqlalchemy.orm import Session
from datetime import datetime
from typing import List, Optional, Set, Tuple
from sqlalchemy.orm import Session, joinedload
from app.models.compliance import ComplianceRule, ComplianceIssue
from app.models.project import ClassificationProject, ClassificationResult
from app.models.classification import DataLevel
from app.models.project import ClassificationResult
from app.models.masking import MaskingRule
@@ -26,23 +25,47 @@ def init_builtin_rules(db: Session):
def scan_compliance(db: Session, project_id: Optional[int] = None) -> List[ComplianceIssue]:
"""Run compliance scan and generate issues."""
rules = db.query(ComplianceRule).filter(ComplianceRule.is_active == True).all()
issues = []
if not rules:
return []
# Get masking rules for check_masking logic
masking_rules = db.query(MaskingRule).filter(MaskingRule.is_active == True).all()
masking_level_ids = {r.level_id for r in masking_rules if r.level_id}
query = db.query(ClassificationProject)
# Build result filter and determine project ids
result_filter = [ClassificationResult.level_id.isnot(None)]
project_ids: List[int] = []
if project_id:
query = query.filter(ClassificationProject.id == project_id)
projects = query.all()
result_filter.append(ClassificationResult.project_id == project_id)
project_ids = [project_id]
else:
project_ids = [
r[0] for r in db.query(ClassificationResult.project_id).distinct().all()
]
if project_ids:
result_filter.append(ClassificationResult.project_id.in_(project_ids))
else:
return []
for project in projects:
results = db.query(ClassificationResult).filter(
ClassificationResult.project_id == project.id,
ClassificationResult.level_id.isnot(None),
# Pre-load all results with level and column to avoid N+1 queries
results = db.query(ClassificationResult).options(
joinedload(ClassificationResult.level),
joinedload(ClassificationResult.column),
).filter(*result_filter).all()
if not results:
return []
# Batch query existing open issues
existing_issues = db.query(ComplianceIssue).filter(
ComplianceIssue.project_id.in_(project_ids),
ComplianceIssue.status == "open",
).all()
existing_set: Set[Tuple[int, int, str, int]] = {
(i.rule_id, i.project_id, i.entity_type, i.entity_id) for i in existing_issues
}
issues = []
for r in results:
if not r.level:
continue
@@ -76,18 +99,11 @@ def scan_compliance(db: Session, project_id: Optional[int] = None) -> List[Compl
pass
if matched:
# Check if open issue already exists
existing = db.query(ComplianceIssue).filter(
ComplianceIssue.rule_id == rule.id,
ComplianceIssue.project_id == project.id,
ComplianceIssue.entity_type == "column",
ComplianceIssue.entity_id == (r.column_id or 0),
ComplianceIssue.status == "open",
).first()
if not existing:
key = (rule.id, r.project_id, "column", r.column_id or 0)
if key not in existing_set:
issue = ComplianceIssue(
rule_id=rule.id,
project_id=project.id,
project_id=r.project_id,
entity_type="column",
entity_id=r.column_id or 0,
entity_name=r.column.name if r.column else "未知",
@@ -97,7 +113,9 @@ def scan_compliance(db: Session, project_id: Optional[int] = None) -> List[Compl
)
db.add(issue)
issues.append(issue)
existing_set.add(key)
if issues:
db.commit()
return issues
+542
View File
@@ -0,0 +1,542 @@
"""
Generate test data for DataPointer system.
Targets: 10000+ records across all tables.
"""
import sys
sys.path.insert(0, '/app')
import random
import string
import json
from datetime import datetime, timedelta
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from app.core.database import Base
from app.models.user import User, Role, Dept, UserRole
from app.models.metadata import DataSource, Database, DataTable, DataColumn
from app.models.classification import Category, DataLevel, RecognitionRule, ClassificationTemplate
from app.models.project import ClassificationProject, ClassificationTask, ClassificationResult, ResultStatus
from app.models.log import OperationLog
from app.core.security import get_password_hash
# Database connection
DATABASE_URL = "postgresql+psycopg2://pdg:pdg_secret_2024@db:5432/prop_data_guard"
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(bind=engine)
db = SessionLocal()
# Clear existing test data (preserve admin user and built-in data)
print("Clearing existing test data...")
db.query(ClassificationResult).delete(synchronize_session=False)
db.query(ClassificationTask).delete(synchronize_session=False)
db.query(ClassificationProject).delete(synchronize_session=False)
db.query(DataColumn).delete(synchronize_session=False)
db.query(DataTable).delete(synchronize_session=False)
db.query(Database).delete(synchronize_session=False)
db.query(UserRole).filter(UserRole.user_id > 1).delete(synchronize_session=False)
db.query(User).filter(User.id > 1).delete(synchronize_session=False)
db.query(Dept).filter(Dept.id > 1).delete(synchronize_session=False)
db.query(OperationLog).delete(synchronize_session=False)
db.commit()
# Reset all sequences to avoid ID conflicts
from sqlalchemy import text
sequences = [
"sys_dept_id_seq", "sys_user_id_seq", "sys_user_role_id_seq",
"data_source_id_seq", "meta_database_id_seq", "meta_table_id_seq", "meta_column_id_seq",
"classification_project_id_seq", "classification_task_id_seq", "classification_result_id_seq",
"classification_change_id_seq", "sys_operation_log_id_seq",
]
for seq in sequences:
db.execute(text(f"ALTER SEQUENCE {seq} RESTART WITH 100"))
db.commit()
print(" Sequences reset")
random.seed(42)
# ============================================================
# 1. Departments
# ============================================================
print("Generating departments...")
root_dept_names = ["数据安全部", "合规管理部", "信息技术部"]
root_depts = []
for name in root_dept_names:
d = Dept(name=name, parent_id=None, sort_order=len(root_depts))
db.add(d)
root_depts.append(d)
db.commit()
for d in root_depts:
db.refresh(d)
# Map root depts by index: 0=数据安全部, 1=合规管理部, 2=信息技术部
root_id_map = {i+1: d.id for i, d in enumerate(root_depts)}
child_dept_defs = [
("业务一部", root_id_map[1]), ("业务二部", root_id_map[1]),
("车险事业部", root_id_map[3]), ("非车险事业部", root_id_map[3]), ("理赔服务部", root_id_map[3]),
("财务部", root_id_map[2]), ("精算部", root_id_map[2]),
("客户服务部", root_id_map[1]), ("渠道管理部", root_id_map[1]),
]
depts = root_depts[:]
for name, pid in child_dept_defs:
d = Dept(name=name, parent_id=pid, sort_order=len(depts))
db.add(d)
depts.append(d)
db.commit()
for d in depts[len(root_depts):]:
db.refresh(d)
print(f" Created {len(depts)} departments")
# ============================================================
# 2. Users
# ============================================================
print("Generating users...")
roles = db.query(Role).all()
role_map = {r.code: r.id for r in roles}
first_names = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""]
last_names = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "秀英", "", ""]
def random_name():
return random.choice(first_names) + random.choice(last_names)
def random_phone():
return "1" + random.choice(["3","4","5","6","7","8","9"]) + "".join(random.choices(string.digits, k=9))
users = []
for i in range(120):
real = random_name()
username = f"user{i+2:03d}"
user = User(
username=username,
email=f"{username}@datapo.com",
hashed_password=get_password_hash("password123"),
real_name=real,
phone=random_phone(),
is_active=random.random() > 0.05,
is_superuser=False,
dept_id=random.choice(depts).id,
)
db.add(user)
users.append(user)
db.commit()
for u in users:
db.refresh(u)
# Assign roles
role_list = list(roles)
for u in users:
assigned_roles = random.sample(role_list, k=random.randint(1, 2))
for r in assigned_roles:
db.add(UserRole(user_id=u.id, role_id=r.id))
db.commit()
print(f" Created {len(users)} users")
# ============================================================
# 3. Data Sources
# ============================================================
print("Generating data sources...")
source_types = ["postgresql", "mysql", "oracle", "sqlserver", "dm"]
source_configs = [
("核心保单数据库", "postgresql", "db-core-prod", 5432, "core_policy"),
("理赔系统数据库", "mysql", "db-claim-prod", 3306, "claim_db"),
("财务数据仓库", "postgresql", "db-finance-dw", 5432, "finance_dw"),
("客户信息主库", "mysql", "db-cust-master", 3306, "customer_master"),
("渠道管理系统", "oracle", "db-channel-ora", 1521, "CHANNEL"),
("精算分析平台", "postgresql", "db-actuary-ana", 5432, "actuary_analytics"),
("监管报送库", "mysql", "db-regulatory", 3306, "regulatory_report"),
("车辆信息库", "postgresql", "db-vehicle", 5432, "vehicle_db"),
("非车险业务库", "sqlserver", "db-nonauto", 1433, "NonAutoDB"),
("历史归档库", "postgresql", "db-archive", 5432, "archive_db"),
("测试环境核心库", "postgresql", "db-core-test", 5432, "core_test"),
("达梦国产数据库", "dm", "db-dameng-prod", 5236, "DAMENG"),
]
sources = []
for name, stype, host, port, dbname in source_configs:
ds = DataSource(
name=name,
source_type=stype,
host=f"{host}.internal.company.com",
port=port,
database_name=dbname,
username=f"{stype}_admin",
encrypted_password=None,
status="active" if random.random() > 0.1 else "error",
dept_id=random.choice(depts).id,
created_by=random.choice(users).id,
)
db.add(ds)
sources.append(ds)
db.commit()
for s in sources:
db.refresh(s)
print(f" Created {len(sources)} data sources")
# ============================================================
# 4. Databases
# ============================================================
print("Generating databases...")
databases = []
for source in sources:
num_dbs = random.randint(1, 3)
for i in range(num_dbs):
d = Database(
source_id=source.id,
name=f"{source.database_name}_{i+1}" if num_dbs > 1 else source.database_name,
charset="UTF8" if source.source_type != "sqlserver" else "Chinese_PRC_CI_AS",
table_count=0,
)
db.add(d)
databases.append(d)
db.commit()
for d in databases:
db.refresh(d)
print(f" Created {len(databases)} databases")
# ============================================================
# 5. Data Tables & Columns (the big one)
# ============================================================
print("Generating tables and columns...")
table_prefixes = {
"policy": ["t_policy", "t_policy_detail", "t_policy_extension", "t_policy_history", "t_endorsement"],
"claim": ["t_claim", "t_claim_detail", "t_claim_payment", "t_claim_document", "t_survey"],
"customer": ["t_customer", "t_customer_contact", "t_customer_identity", "t_customer_vehicle", "t_customer_preference"],
"finance": ["t_payment", "t_receipt", "t_invoice", "t_commission", "t_reserve"],
"channel": ["t_agent", "t_agent_contract", "t_partner", "t_broker", "t_sales_record"],
"actuary": ["t_pricing_model", "t_risk_factor", "t_loss_ratio", "t_reserve_calc", "t_solvency"],
"regulatory": ["t_report_cbrc", "t_report_circ", "t_stat_premium", "t_stat_claim", "t_stat_channel"],
"vehicle": ["t_vehicle", "t_vehicle_model", "t_vehicle_usage", "t_vehicle_accident", "t_vehicle_maintenance"],
"system": ["t_user", "t_role", "t_permission", "t_log", "t_config", "t_dict"],
"archive": ["t_archive_policy", "t_archive_claim", "t_archive_customer", "t_archive_finance"],
}
column_templates = [
("id", "BIGINT", "主键ID", "system", 2),
("created_at", "TIMESTAMP", "创建时间", "system", 2),
("updated_at", "TIMESTAMP", "更新时间", "system", 2),
("is_deleted", "BOOLEAN", "是否删除", "system", 2),
("created_by", "BIGINT", "创建人", "system", 2),
("customer_name", "VARCHAR", "客户姓名", "customer", 4),
("customer_id_no", "VARCHAR", "客户身份证号", "customer", 4),
("mobile_phone", "VARCHAR", "手机号码", "customer", 4),
("email", "VARCHAR", "电子邮箱", "customer", 3),
("address", "VARCHAR", "联系地址", "customer", 3),
("bank_account", "VARCHAR", "银行账户", "finance", 4),
("bank_card_no", "VARCHAR", "银行卡号", "finance", 4),
("policy_no", "VARCHAR", "保单号", "policy", 3),
("policy_status", "VARCHAR", "保单状态", "policy", 2),
("premium_amount", "DECIMAL", "保费金额", "finance", 3),
("claim_no", "VARCHAR", "理赔号", "claim", 3),
("claim_amount", "DECIMAL", "理赔金额", "claim", 4),
("loss_description", "TEXT", "损失描述", "claim", 3),
("accident_location", "VARCHAR", "出险地点", "claim", 3),
("vehicle_plate", "VARCHAR", "车牌号", "vehicle", 3),
("vin_code", "VARCHAR", "车辆识别代码VIN", "vehicle", 4),
("agent_name", "VARCHAR", "代理人姓名", "channel", 3),
("agent_license", "VARCHAR", "代理人执业证号", "channel", 3),
("commission_rate", "DECIMAL", "佣金比例", "finance", 3),
("reserve_amount", "DECIMAL", "准备金金额", "finance", 5),
("solvency_ratio", "DECIMAL", "偿付能力充足率", "finance", 5),
("password_hash", "VARCHAR", "密码哈希", "system", 5),
("api_secret", "VARCHAR", "API密钥", "system", 5),
("session_token", "VARCHAR", "会话令牌", "system", 4),
("gps_location", "VARCHAR", "GPS定位信息", "vehicle", 4),
("driving_record", "TEXT", "行驶记录", "vehicle", 4),
("medical_record", "TEXT", "医疗记录", "claim", 4),
("income_info", "DECIMAL", "收入信息", "customer", 4),
("credit_score", "INT", "信用评分", "customer", 4),
("family_member", "VARCHAR", "家庭成员信息", "customer", 3),
("emergency_contact", "VARCHAR", "紧急联系人", "customer", 3),
("beneficiary_name", "VARCHAR", "受益人姓名", "policy", 4),
("beneficiary_id_no", "VARCHAR", "受益人身份证号", "policy", 4),
("underwriting_decision", "VARCHAR", "核保结论", "policy", 3),
("risk_score", "DECIMAL", "风险评分", "actuary", 3),
("fraud_flag", "BOOLEAN", "欺诈标记", "claim", 3),
("audit_comment", "TEXT", "审计意见", "system", 3),
("report_period", "VARCHAR", "报表期间", "regulatory", 2),
("regulatory_code", "VARCHAR", "监管编码", "regulatory", 2),
]
all_tables = []
all_columns = []
for database in databases:
prefix_key = "system"
for k in table_prefixes:
if k in database.name.lower() or k in database.source.name.lower():
prefix_key = k
break
prefix_list = table_prefixes.get(prefix_key, table_prefixes["system"])
num_tables = random.randint(25, 60)
for tidx in range(num_tables):
table_name = f"{random.choice(prefix_list)}_{tidx+1:03d}"
tbl = DataTable(
database_id=database.id,
name=table_name,
comment=f"{table_name}数据表",
row_count=random.randint(10000, 10000000),
column_count=0,
)
db.add(tbl)
all_tables.append(tbl)
db.commit()
for t in all_tables:
db.refresh(t)
print(f" Created {len(all_tables)} tables")
# Now generate columns
print(" Generating columns (this may take a moment)...")
levels = db.query(DataLevel).all()
level_map = {l.code: l.id for l in levels}
categories = db.query(Category).all()
cat_map = {}
for c in categories:
if c.code.startswith("CUST") and "customer" not in cat_map:
cat_map["customer"] = c.id
elif c.code.startswith("POLICY") and "policy" not in cat_map:
cat_map["policy"] = c.id
elif c.code.startswith("CLAIM") and "claim" not in cat_map:
cat_map["claim"] = c.id
elif c.code.startswith("FIN") and "finance" not in cat_map:
cat_map["finance"] = c.id
elif c.code.startswith("CHANNEL") and "channel" not in cat_map:
cat_map["channel"] = c.id
elif c.code.startswith("REG") and "regulatory" not in cat_map:
cat_map["regulatory"] = c.id
elif c.code.startswith("INT") and "system" not in cat_map:
cat_map["system"] = c.id
elif c.code.startswith("SUB") and "vehicle" not in cat_map:
cat_map["vehicle"] = c.id
sample_values = {
"customer_name": ["张三", "李四", "王五", "赵六", "钱七"],
"customer_id_no": ["110101199001011234", "310101198502023456", "440106197803034567"],
"mobile_phone": ["13800138000", "13900139000", "13700137000"],
"email": ["user1@example.com", "user2@test.com", "contact@company.com"],
"bank_card_no": ["6222021234567890123", "6228481234567890123"],
"vin_code": ["LSVAG2180E2100001", "LFV3A28K8A3000001"],
"vehicle_plate": ["京A12345", "沪B67890", "粤C11111"],
"policy_no": ["PICC2024000001", "PICC2024000002", "PICC2024000003"],
"claim_no": ["CLM2024000001", "CLM2024000002", "CLM2024000003"],
"address": ["北京市海淀区xxx路1号", "上海市浦东新区xxx路2号"],
}
batch_size = 500
column_batch = []
for tbl in all_tables:
num_cols = random.randint(15, 35)
selected_templates = random.sample(column_templates, k=min(num_cols, len(column_templates)))
for cidx, (col_name, col_type, comment, cat_hint, lvl_hint) in enumerate(selected_templates):
actual_name = col_name if cidx == 0 else f"{col_name}_{cidx}"
samples = None
if col_name in sample_values:
samples = json.dumps(random.sample(sample_values[col_name], k=min(3, len(sample_values[col_name]))), ensure_ascii=False)
col = DataColumn(
table_id=tbl.id,
name=actual_name,
data_type=col_type,
length=random.choice([20, 50, 100, 200, 500]) if "VARCHAR" in col_type else None,
comment=comment,
is_nullable=random.random() > 0.2,
sample_data=samples,
)
column_batch.append(col)
if len(column_batch) >= batch_size:
db.bulk_save_objects(column_batch)
db.commit()
all_columns.extend(column_batch)
column_batch = []
if column_batch:
db.bulk_save_objects(column_batch)
db.commit()
all_columns.extend(column_batch)
print(f" Created {len(all_columns)} columns")
# Update table counts
for tbl in all_tables:
tbl.column_count = db.query(DataColumn).filter(DataColumn.table_id == tbl.id).count()
db.add(tbl)
db.commit()
for database in databases:
database.table_count = db.query(DataTable).filter(DataTable.database_id == database.id).count()
db.add(database)
db.commit()
# ============================================================
# 6. Classification Projects
# ============================================================
print("Generating classification projects...")
templates = db.query(ClassificationTemplate).all()
projects = []
project_names = [
"2024年度数据分类分级专项",
"核心系统敏感数据梳理",
"新核心上线数据定级",
"客户个人信息保护专项",
"财务数据安全治理",
"理赔数据合规检查",
"渠道数据梳理项目",
"监管报送数据定级",
]
for i, name in enumerate(project_names):
p = ClassificationProject(
name=name,
template_id=random.choice(templates).id,
description=f"{name} - 数据分类分级治理项目",
status=random.choice(["created", "scanning", "labeling", "reviewing", "published"]),
target_source_ids=",".join(str(s.id) for s in random.sample(sources, k=random.randint(2, 5))),
planned_start=datetime.now() - timedelta(days=random.randint(10, 60)),
planned_end=datetime.now() + timedelta(days=random.randint(10, 90)),
created_by=random.choice(users).id,
)
db.add(p)
projects.append(p)
db.commit()
for p in projects:
db.refresh(p)
print(f" Created {len(projects)} projects")
# ============================================================
# 7. Classification Results (the critical mass)
# ============================================================
print("Generating classification results...")
# Re-fetch column IDs from DB since bulk_save_objects doesn't populate object IDs
col_rows = db.query(DataColumn.id).all()
all_col_ids = [c[0] for c in col_rows]
random.shuffle(all_col_ids)
result_batch = []
total_results_target = 20000
results_per_project = total_results_target // len(projects)
for proj in projects:
assigned_cols = random.sample(all_col_ids, k=min(results_per_project, len(all_col_ids)))
for col_id in assigned_cols:
source_type = random.choices(["auto", "manual"], weights=[0.7, 0.3])[0]
status_val = "auto" if source_type == "auto" else random.choice(["manual", "reviewed"])
cat = random.choice(categories)
lvl = random.choice(levels)
conf = round(random.uniform(0.3, 0.98), 2)
r = ClassificationResult(
project_id=proj.id,
column_id=col_id,
category_id=cat.id,
level_id=lvl.id,
source=source_type,
confidence=conf,
status=status_val,
labeler_id=random.choice(users).id if source_type == "manual" else None,
)
result_batch.append(r)
if len(result_batch) >= batch_size:
db.bulk_save_objects(result_batch)
db.commit()
result_batch = []
if result_batch:
db.bulk_save_objects(result_batch)
db.commit()
total_results = db.query(ClassificationResult).count()
print(f" Created {total_results} classification results")
# ============================================================
# 8. Classification Tasks
# ============================================================
print("Generating classification tasks...")
tasks = []
for proj in projects:
num_tasks = random.randint(2, 5)
for tidx in range(num_tasks):
task = ClassificationTask(
project_id=proj.id,
name=f"{proj.name}-任务{tidx+1}",
assigner_id=random.choice(users).id,
assignee_id=random.choice(users).id,
target_type="column",
status=random.choice(["pending", "in_progress", "completed"]),
deadline=datetime.now() + timedelta(days=random.randint(5, 30)),
)
db.add(task)
tasks.append(task)
db.commit()
print(f" Created {len(tasks)} tasks")
# ============================================================
# 9. Operation Logs
# ============================================================
print("Generating operation logs...")
log_actions = ["登录", "查询数据源", "创建项目", "自动分类", "人工打标", "导出报告", "修改规则", "删除任务"]
log_modules = ["auth", "datasource", "project", "classification", "task", "report", "rule", "system"]
log_batch = []
for i in range(8000):
log = OperationLog(
user_id=random.choice([None] + [u.id for u in users]),
username=random.choice(["admin"] + [u.username for u in users]),
module=random.choice(log_modules),
action=random.choice(log_actions),
method=random.choice(["GET", "POST", "PUT", "DELETE"]),
path=f"/api/v1/{random.choice(log_modules)}/{random.randint(1, 100)}",
ip=f"10.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(0,255)}",
status_code=random.choice([200, 200, 200, 201, 400, 401, 404, 500]),
duration_ms=random.randint(10, 2000),
created_at=datetime.now() - timedelta(days=random.randint(0, 30), hours=random.randint(0, 23)),
)
log_batch.append(log)
if len(log_batch) >= batch_size:
db.bulk_save_objects(log_batch)
db.commit()
log_batch = []
if log_batch:
db.bulk_save_objects(log_batch)
db.commit()
total_logs = db.query(OperationLog).count()
print(f" Created {total_logs} operation logs")
# ============================================================
# Summary
# ============================================================
print("\n" + "="*60)
print("Test data generation complete!")
print("="*60)
print(f" Departments: {db.query(Dept).count()}")
print(f" Users: {db.query(User).count()}")
print(f" Data Sources: {db.query(DataSource).count()}")
print(f" Databases: {db.query(Database).count()}")
print(f" Tables: {db.query(DataTable).count()}")
print(f" Columns: {db.query(DataColumn).count()}")
print(f" Categories: {db.query(Category).count()}")
print(f" Data Levels: {db.query(DataLevel).count()}")
print(f" Rules: {db.query(RecognitionRule).count()}")
print(f" Templates: {db.query(ClassificationTemplate).count()}")
print(f" Projects: {db.query(ClassificationProject).count()}")
print(f" Tasks: {db.query(ClassificationTask).count()}")
print(f" Results: {db.query(ClassificationResult).count()}")
print(f" Operation Logs: {db.query(OperationLog).count()}")
print("="*60)
db.close()
+3
View File
@@ -78,6 +78,9 @@ services:
container_name: pdg-frontend
ports:
- "80:80"
- "443:443"
volumes:
- ./ssl:/etc/nginx/ssl:ro
depends_on:
- backend
restart: unless-stopped
+14
View File
@@ -1,9 +1,23 @@
# HTTP redirect to HTTPS
server {
listen 80;
server_name datapointer.cnroc.cn localhost _;
return 301 https://$host$request_uri;
}
server {
listen 443 ssl;
server_name datapointer.cnroc.cn localhost _;
root /usr/share/nginx/html;
index index.html;
# SSL certificates
ssl_certificate /etc/nginx/ssl/fullchain.pem;
ssl_certificate_key /etc/nginx/ssl/privkey.pem;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers HIGH:!aNULL:!MD5;
ssl_prefer_server_ciphers on;
# Gzip compression
gzip on;
gzip_vary on;
+1 -1
View File
@@ -35,7 +35,7 @@ export function deleteProject(id: number) {
}
export function autoClassifyProject(id: number, background: boolean = true) {
return request.post(`/projects/${id}/auto-classify`, null, { params: { background } })
return request.post(`/projects/${id}/auto-classify`, undefined, { params: { background } })
}
export function getAutoClassifyStatus(id: number) {
+5 -1
View File
@@ -36,7 +36,11 @@ request.interceptors.response.use(
localStorage.removeItem('dp_refresh')
window.location.href = '/login'
} else {
ElMessage.error((error.response?.data as any)?.message || '网络错误')
const data = error.response?.data as any
const detail = Array.isArray(data?.detail)
? data.detail.map((d: any) => d.msg || JSON.stringify(d)).join(', ')
: data?.detail
ElMessage.error(detail || data?.message || error.message || '网络错误')
}
return Promise.reject(error)
}
+2 -1
View File
@@ -220,7 +220,8 @@ async function handleAutoClassify(p: ProjectItem) {
}
fetchData()
} catch (e: any) {
ElMessage.error(e?.message || '自动分类失败')
const msg = e?.response?.data?.detail || e?.response?.data?.message || e?.message || '自动分类失败'
ElMessage.error(msg)
}
}