feat: 全量功能模块开发与集成测试修复

- 新增后端模块:Alert、APIAsset、Compliance、Lineage、Masking、Risk、SchemaChange、Unstructured、Watermark
- 新增前端模块页面与API接口
- 新增Alembic迁移脚本(002-014)覆盖全量业务表
- 新增测试数据生成脚本与集成测试脚本
- 修复metadata模型JSON类型导入缺失导致启动失败的问题
- 修复前端Alert/APIAsset页面request模块路径错误
- 更新docker-compose与开发计划文档
This commit is contained in:
hiderfong
2026-04-25 08:51:38 +08:00
parent 8b2bc84399
commit 6d70520e79
110 changed files with 6125 additions and 87 deletions
+174
View File
@@ -0,0 +1,174 @@
import requests, json
from typing import Optional
from sqlalchemy.orm import Session
from app.models.api_asset import APIAsset, APIEndpoint
from app.models.metadata import DataColumn
from app.services.classification_engine import match_rule
# Simple sensitive keywords for API field detection
SENSITIVE_KEYWORDS = [
"password", "pwd", "passwd", "secret", "token", "credit_card", "card_no",
"bank_account", "bank_card", "id_card", "id_number", "phone", "mobile",
"email", "address", "name", "age", "gender", "salary", "income",
"health", "medical", "biometric", "fingerprint", "face",
]
def _is_sensitive_field(name: str, schema: dict) -> tuple[bool, str]:
low = name.lower()
for kw in SENSITIVE_KEYWORDS:
if kw in low:
return True, f"keyword:{kw}"
# Check description / format hints
desc = str(schema.get("description", "")).lower()
fmt = str(schema.get("format", "")).lower()
if "email" in fmt or "email" in desc:
return True, "format:email"
if "uuid" in fmt and "user" in low:
return True, "format:user-uuid"
return False, ""
def _extract_fields(schema: dict, prefix: str = "") -> list[dict]:
fields = []
if not isinstance(schema, dict):
return fields
props = schema.get("properties", {})
for k, v in props.items():
full_name = f"{prefix}.{k}" if prefix else k
sensitive, reason = _is_sensitive_field(k, v)
if sensitive:
fields.append({"name": full_name, "type": v.get("type", "unknown"), "reason": reason})
# nested object
if v.get("type") == "object" and "properties" in v:
fields.extend(_extract_fields(v, full_name))
# array items
if v.get("type") == "array" and isinstance(v.get("items"), dict):
fields.extend(_extract_fields(v["items"], full_name + "[]"))
return fields
def _risk_level_from_fields(fields: list[dict]) -> str:
if not fields:
return "low"
high_keywords = {"password", "secret", "token", "credit_card", "bank_account", "biometric", "fingerprint", "face"}
for f in fields:
for kw in high_keywords:
if kw in f["name"].lower():
return "critical" if kw in {"password", "secret", "token", "biometric"} else "high"
return "medium"
def scan_swagger(db: Session, asset_id: int) -> dict:
asset = db.query(APIAsset).filter(APIAsset.id == asset_id).first()
if not asset:
return {"success": False, "error": "Asset not found"}
if not asset.swagger_url:
return {"success": False, "error": "No swagger_url configured"}
asset.scan_status = "scanning"
db.commit()
try:
headers = dict(asset.headers or {})
resp = requests.get(asset.swagger_url, headers=headers, timeout=30)
resp.raise_for_status()
spec = resp.json()
# Clear previous endpoints
db.query(APIEndpoint).filter(APIEndpoint.asset_id == asset_id).delete()
paths = spec.get("paths", {})
total = 0
sensitive_total = 0
for path, methods in paths.items():
for method, detail in methods.items():
if method.lower() not in {"get","post","put","patch","delete","head","options"}:
continue
total += 1
parameters = []
for p in detail.get("parameters", []):
parameters.append({"name": p.get("name"), "in": p.get("in"), "required": p.get("required", False), "type": p.get("schema",{}).get("type","string")})
req_schema = detail.get("requestBody", {}).get("content", {}).get("application/json", {}).get("schema")
resp_schema = None
for code, resp_detail in (detail.get("responses", {}).get("200", {}).get("content", {}) or {}).items():
if isinstance(resp_detail, dict) and "schema" in resp_detail:
resp_schema = resp_detail["schema"]
break
# Also try generic 200
if resp_schema is None:
ok = detail.get("responses", {}).get("200", {})
for ct, cd in ok.get("content", {}).items():
if isinstance(cd, dict) and "schema" in cd:
resp_schema = cd["schema"]
break
fields = []
if req_schema:
fields.extend(_extract_fields(req_schema))
if resp_schema:
fields.extend(_extract_fields(resp_schema))
# dedup
seen = set()
unique_fields = []
for f in fields:
if f["name"] not in seen:
seen.add(f["name"])
unique_fields.append(f)
risk = _risk_level_from_fields(unique_fields)
ep = APIEndpoint(
asset_id=asset_id,
method=method.upper(),
path=path,
summary=detail.get("summary", ""),
tags=detail.get("tags", []),
parameters=parameters,
request_body_schema=req_schema,
response_schema=resp_schema,
sensitive_fields=unique_fields,
risk_level=risk,
)
db.add(ep)
if unique_fields:
sensitive_total += 1
asset.scan_status = "completed"
asset.total_endpoints = total
asset.sensitive_endpoints = sensitive_total
asset.updated_at = __import__('datetime').datetime.utcnow()
db.commit()
return {"success": True, "total": total, "sensitive": sensitive_total}
except Exception as e:
asset.scan_status = "failed"
db.commit()
return {"success": False, "error": str(e)}
def create_asset(db: Session, data: dict, user_id: Optional[int] = None) -> APIAsset:
asset = APIAsset(
name=data["name"],
base_url=data["base_url"],
swagger_url=data.get("swagger_url"),
auth_type=data.get("auth_type", "none"),
headers=data.get("headers"),
description=data.get("description"),
created_by=user_id,
)
db.add(asset)
db.commit()
db.refresh(asset)
return asset
def update_asset(db: Session, asset_id: int, data: dict) -> Optional[APIAsset]:
asset = db.query(APIAsset).filter(APIAsset.id == asset_id).first()
if not asset:
return None
for k, v in data.items():
if hasattr(asset, k):
setattr(asset, k, v)
db.commit()
db.refresh(asset)
return asset
def delete_asset(db: Session, asset_id: int) -> bool:
asset = db.query(APIAsset).filter(APIAsset.id == asset_id).first()
if not asset:
return False
db.delete(asset)
db.commit()
return True