feat: 全量功能模块开发与集成测试修复
- 新增后端模块:Alert、APIAsset、Compliance、Lineage、Masking、Risk、SchemaChange、Unstructured、Watermark - 新增前端模块页面与API接口 - 新增Alembic迁移脚本(002-014)覆盖全量业务表 - 新增测试数据生成脚本与集成测试脚本 - 修复metadata模型JSON类型导入缺失导致启动失败的问题 - 修复前端Alert/APIAsset页面request模块路径错误 - 更新docker-compose与开发计划文档
This commit is contained in:
@@ -0,0 +1,124 @@
|
||||
import sys, requests
|
||||
BASE = "http://localhost:8000"
|
||||
API = f"{BASE}/api/v1"
|
||||
errors, passed = [], []
|
||||
|
||||
def check(name, ok, detail=""):
|
||||
if ok:
|
||||
passed.append(name); print(f" ✅ {name}")
|
||||
else:
|
||||
errors.append((name, detail)); print(f" ❌ {name}: {detail}")
|
||||
|
||||
def get_items(resp):
|
||||
d = resp.json().get("data", [])
|
||||
if isinstance(d, list):
|
||||
return d
|
||||
if isinstance(d, dict):
|
||||
return d.get("items", [])
|
||||
return []
|
||||
|
||||
def get_total(resp):
|
||||
return resp.json().get("total", 0)
|
||||
|
||||
print("\n[1/15] Health")
|
||||
r = requests.get(f"{BASE}/health")
|
||||
check("health", r.status_code == 200 and r.json().get("status") == "ok")
|
||||
|
||||
print("\n[2/15] Auth")
|
||||
r = requests.post(f"{API}/auth/login", json={"username": "admin", "password": "admin123"})
|
||||
check("login", r.status_code == 200)
|
||||
token = r.json().get("data", {}).get("access_token", "")
|
||||
check("token", bool(token))
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
|
||||
print("\n[3/15] User")
|
||||
r = requests.get(f"{API}/users/me", headers=headers)
|
||||
check("me", r.status_code == 200 and r.json()["data"]["username"] == "admin")
|
||||
r = requests.get(f"{API}/users?page_size=100", headers=headers)
|
||||
check("users", r.status_code == 200 and len(get_items(r)) >= 80, f"got {len(get_items(r))}")
|
||||
|
||||
print("\n[4/15] Depts")
|
||||
r = requests.get(f"{API}/users/depts", headers=headers)
|
||||
check("depts", r.status_code == 200 and len(r.json().get("data", [])) >= 12, f"got {len(r.json().get('data', []))}")
|
||||
|
||||
print("\n[5/15] DataSources")
|
||||
r = requests.get(f"{API}/datasources", headers=headers)
|
||||
check("datasources", r.status_code == 200 and len(get_items(r)) >= 12, f"got {len(get_items(r))}")
|
||||
|
||||
print("\n[6/15] Metadata")
|
||||
r = requests.get(f"{API}/metadata/databases", headers=headers)
|
||||
check("databases", r.status_code == 200 and len(get_items(r)) >= 31, f"got {len(get_items(r))}")
|
||||
r = requests.get(f"{API}/metadata/tables", headers=headers)
|
||||
check("tables", r.status_code == 200 and len(get_items(r)) >= 800, f"got {len(get_items(r))}")
|
||||
r = requests.get(f"{API}/metadata/columns", headers=headers)
|
||||
check("columns", r.status_code == 200 and get_total(r) >= 10000, f"total={get_total(r)}")
|
||||
|
||||
print("\n[7/15] Classification")
|
||||
r = requests.get(f"{API}/classifications/levels", headers=headers)
|
||||
check("levels", r.status_code == 200 and len(r.json().get("data", [])) == 5)
|
||||
r = requests.get(f"{API}/classifications/categories", headers=headers)
|
||||
check("categories", r.status_code == 200 and len(r.json().get("data", [])) >= 20, f"got {len(r.json().get('data', []))}")
|
||||
r = requests.get(f"{API}/classifications/results", headers=headers)
|
||||
check("results", r.status_code == 200 and get_total(r) >= 1000, f"total={get_total(r)}")
|
||||
|
||||
print("\n[8/15] Projects")
|
||||
r = requests.get(f"{API}/projects", headers=headers)
|
||||
check("projects", r.status_code == 200 and len(get_items(r)) >= 8, f"got {len(get_items(r))}")
|
||||
|
||||
print("\n[9/15] Tasks")
|
||||
r = requests.get(f"{API}/tasks/my-tasks", headers=headers)
|
||||
check("tasks", r.status_code == 200 and len(get_items(r)) >= 20, f"got {len(get_items(r))}")
|
||||
|
||||
print("\n[10/15] Dashboard")
|
||||
r = requests.get(f"{API}/dashboard/stats", headers=headers)
|
||||
check("stats", r.status_code == 200)
|
||||
stats = r.json().get("data", {})
|
||||
check("stats.data_sources", stats.get("data_sources", 0) >= 12, f"got {stats.get('data_sources')}")
|
||||
check("stats.tables", stats.get("tables", 0) >= 800, f"got {stats.get('tables')}")
|
||||
check("stats.columns", stats.get("columns", 0) >= 10000, f"got {stats.get('columns')}")
|
||||
check("stats.labeled", stats.get("labeled", 0) >= 10000, f"got {stats.get('labeled')}")
|
||||
r = requests.get(f"{API}/dashboard/distribution", headers=headers)
|
||||
check("distribution", r.status_code == 200 and "level_distribution" in r.json().get("data", {}))
|
||||
|
||||
print("\n[11/15] Reports")
|
||||
r = requests.get(f"{API}/reports/stats", headers=headers)
|
||||
check("report stats", r.status_code == 200)
|
||||
|
||||
print("\n[12/15] Masking")
|
||||
r = requests.get(f"{API}/masking/rules", headers=headers)
|
||||
check("masking rules", r.status_code == 200)
|
||||
|
||||
print("\n[13/15] Watermark")
|
||||
r = requests.post(f"{API}/watermark/trace", headers={**headers, "Content-Type": "application/json"}, json={"content": "test watermark"})
|
||||
check("watermark trace", r.status_code == 200)
|
||||
|
||||
print("\n[14/15] Risk")
|
||||
r = requests.get(f"{API}/risk/top", headers=headers)
|
||||
check("risk top", r.status_code == 200)
|
||||
|
||||
print("\n[15/15] Compliance")
|
||||
r = requests.get(f"{API}/compliance/issues", headers=headers)
|
||||
check("compliance issues", r.status_code == 200)
|
||||
|
||||
# Additional modules
|
||||
print("\n[Bonus] Additional modules")
|
||||
r = requests.get(f"{API}/lineage/graph", headers=headers)
|
||||
check("lineage graph", r.status_code == 200 and "nodes" in r.json().get("data", {}))
|
||||
r = requests.get(f"{API}/alerts/records", headers=headers)
|
||||
check("alert records", r.status_code == 200)
|
||||
r = requests.get(f"{API}/schema-changes/logs", headers=headers)
|
||||
check("schema changes logs", r.status_code == 200)
|
||||
r = requests.get(f"{API}/unstructured/files", headers=headers)
|
||||
check("unstructured files", r.status_code == 200)
|
||||
r = requests.get(f"{API}/api-assets", headers=headers)
|
||||
check("api assets", r.status_code == 200)
|
||||
|
||||
print("\n" + "="*60)
|
||||
print(f"Results: {len(passed)} passed, {len(errors)} failed")
|
||||
print("="*60)
|
||||
if errors:
|
||||
for n, d in errors: print(f" ❌ {n}: {d}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("🎉 All integration tests passed!")
|
||||
sys.exit(0)
|
||||
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
Generate synthetic manual-labeled data for ML model training/demo.
|
||||
Run this script after metadata has been scanned so there are columns to label.
|
||||
"""
|
||||
import random
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from app.core.database import SessionLocal
|
||||
from app.models.metadata import DataColumn
|
||||
from app.models.classification import Category
|
||||
from app.models.project import ClassificationResult
|
||||
|
||||
|
||||
def main():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
columns = db.query(DataColumn).limit(300).all()
|
||||
if not columns:
|
||||
print("No columns found in database. Please scan a data source first.")
|
||||
return
|
||||
|
||||
categories = db.query(Category).filter(Category.level == 2).all()
|
||||
if not categories:
|
||||
print("No sub-categories found.")
|
||||
return
|
||||
|
||||
# Clear old manual labels to avoid duplicates
|
||||
db.query(ClassificationResult).filter(ClassificationResult.source == "manual").delete()
|
||||
db.commit()
|
||||
|
||||
count = 0
|
||||
for col in columns:
|
||||
# Deterministic pseudo-random based on column name for reproducibility
|
||||
rng = random.Random(col.name)
|
||||
cat = rng.choice(categories)
|
||||
# Create a fake manual result (project_id=1 assumed to exist or None)
|
||||
result = ClassificationResult(
|
||||
project_id=None,
|
||||
column_id=col.id,
|
||||
category_id=cat.id,
|
||||
level_id=cat.parent.level if cat.parent else 3, # fallback
|
||||
source="manual",
|
||||
confidence=1.0,
|
||||
status="manual",
|
||||
)
|
||||
db.add(result)
|
||||
count += 1
|
||||
|
||||
db.commit()
|
||||
print(f"Generated {count} manual labels across {len(categories)} categories.")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,176 @@
|
||||
import sys
|
||||
|
||||
with open(sys.argv[1], 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
marker = 'def sync_metadata(db: Session, source_id: int, user_id: int) -> dict:'
|
||||
idx = content.find(marker)
|
||||
if idx == -1:
|
||||
print('Marker not found')
|
||||
sys.exit(1)
|
||||
|
||||
new_func = '''def _compute_checksum(data: dict) -> str:
|
||||
import hashlib, json
|
||||
payload = json.dumps(data, sort_keys=True, ensure_ascii=False, default=str)
|
||||
return hashlib.sha256(payload.encode()).hexdigest()[:32]
|
||||
|
||||
|
||||
def sync_metadata(db: Session, source_id: int, user_id: int) -> dict:
|
||||
from sqlalchemy import create_engine, inspect, text
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
source = get_datasource(db, source_id)
|
||||
if not source:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="数据源不存在")
|
||||
|
||||
driver_map = {
|
||||
"mysql": "mysql+pymysql",
|
||||
"postgresql": "postgresql+psycopg2",
|
||||
"oracle": "oracle+cx_oracle",
|
||||
"sqlserver": "mssql+pymssql",
|
||||
}
|
||||
driver = driver_map.get(source.source_type, source.source_type)
|
||||
|
||||
if source.source_type == "dm":
|
||||
return {"success": True, "message": "达梦数据库同步成功(模拟)", "databases": 0, "tables": 0, "columns": 0}
|
||||
|
||||
password = ""
|
||||
if source.encrypted_password:
|
||||
try:
|
||||
password = _decrypt_password(source.encrypted_password)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
url = f"{driver}://{source.username}:{password}@{source.host}:{source.port}/{source.database_name}"
|
||||
engine = create_engine(url, pool_pre_ping=True)
|
||||
inspector = inspect(engine)
|
||||
|
||||
db_names = inspector.get_schema_names() or [source.database_name]
|
||||
scan_time = datetime.utcnow()
|
||||
total_tables = 0
|
||||
total_columns = 0
|
||||
updated_tables = 0
|
||||
updated_columns = 0
|
||||
|
||||
for db_name in db_names:
|
||||
db_checksum = _compute_checksum({"name": db_name})
|
||||
db_obj = db.query(Database).filter(
|
||||
Database.source_id == source.id, Database.name == db_name
|
||||
).first()
|
||||
if not db_obj:
|
||||
db_obj = Database(source_id=source.id, name=db_name, checksum=db_checksum, last_scanned_at=scan_time)
|
||||
db.add(db_obj)
|
||||
else:
|
||||
db_obj.checksum = db_checksum
|
||||
db_obj.last_scanned_at = scan_time
|
||||
db_obj.is_deleted = False
|
||||
db_obj.deleted_at = None
|
||||
|
||||
table_names = inspector.get_table_names(schema=db_name)
|
||||
for tname in table_names:
|
||||
t_checksum = _compute_checksum({"name": tname})
|
||||
table_obj = db.query(DataTable).filter(
|
||||
DataTable.database_id == db_obj.id, DataTable.name == tname
|
||||
).first()
|
||||
if not table_obj:
|
||||
table_obj = DataTable(database_id=db_obj.id, name=tname, checksum=t_checksum, last_scanned_at=scan_time)
|
||||
db.add(table_obj)
|
||||
else:
|
||||
if table_obj.checksum != t_checksum:
|
||||
table_obj.checksum = t_checksum
|
||||
updated_tables += 1
|
||||
table_obj.last_scanned_at = scan_time
|
||||
table_obj.is_deleted = False
|
||||
table_obj.deleted_at = None
|
||||
|
||||
columns = inspector.get_columns(tname, schema=db_name)
|
||||
for col in columns:
|
||||
col_checksum = _compute_checksum({
|
||||
"name": col["name"],
|
||||
"type": str(col.get("type", "")),
|
||||
"max_length": col.get("max_length"),
|
||||
"comment": col.get("comment"),
|
||||
"nullable": col.get("nullable", True),
|
||||
})
|
||||
col_obj = db.query(DataColumn).filter(
|
||||
DataColumn.table_id == table_obj.id, DataColumn.name == col["name"]
|
||||
).first()
|
||||
if not col_obj:
|
||||
sample = None
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
result = conn.execute(text(f'SELECT "{col["name"]}" FROM "{db_name}"."{tname}" LIMIT 5'))
|
||||
samples = [str(r[0]) for r in result if r[0] is not None]
|
||||
sample = json.dumps(samples, ensure_ascii=False)
|
||||
except Exception:
|
||||
pass
|
||||
col_obj = DataColumn(
|
||||
table_id=table_obj.id,
|
||||
name=col["name"],
|
||||
data_type=str(col.get("type", "")),
|
||||
length=col.get("max_length"),
|
||||
comment=col.get("comment"),
|
||||
is_nullable=col.get("nullable", True),
|
||||
sample_data=sample,
|
||||
checksum=col_checksum,
|
||||
last_scanned_at=scan_time,
|
||||
)
|
||||
db.add(col_obj)
|
||||
total_columns += 1
|
||||
else:
|
||||
if col_obj.checksum != col_checksum:
|
||||
col_obj.checksum = col_checksum
|
||||
col_obj.data_type = str(col.get("type", ""))
|
||||
col_obj.length = col.get("max_length")
|
||||
col_obj.comment = col.get("comment")
|
||||
col_obj.is_nullable = col.get("nullable", True)
|
||||
updated_columns += 1
|
||||
col_obj.last_scanned_at = scan_time
|
||||
col_obj.is_deleted = False
|
||||
col_obj.deleted_at = None
|
||||
|
||||
total_tables += 1
|
||||
|
||||
# Soft-delete objects not seen in this scan
|
||||
db.query(Database).filter(
|
||||
Database.source_id == source.id,
|
||||
Database.last_scanned_at < scan_time,
|
||||
).update({"is_deleted": True, "deleted_at": scan_time}, synchronize_session=False)
|
||||
|
||||
for db_obj in db.query(Database).filter(Database.source_id == source.id).all():
|
||||
db.query(DataTable).filter(
|
||||
DataTable.database_id == db_obj.id,
|
||||
DataTable.last_scanned_at < scan_time,
|
||||
).update({"is_deleted": True, "deleted_at": scan_time}, synchronize_session=False)
|
||||
for table_obj in db.query(DataTable).filter(DataTable.database_id == db_obj.id).all():
|
||||
db.query(DataColumn).filter(
|
||||
DataColumn.table_id == table_obj.id,
|
||||
DataColumn.last_scanned_at < scan_time,
|
||||
).update({"is_deleted": True, "deleted_at": scan_time}, synchronize_session=False)
|
||||
|
||||
source.status = "active"
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "元数据同步成功",
|
||||
"databases": len(db_names),
|
||||
"tables": total_tables,
|
||||
"columns": total_columns,
|
||||
"updated_tables": updated_tables,
|
||||
"updated_columns": updated_columns,
|
||||
}
|
||||
except Exception as e:
|
||||
source.status = "error"
|
||||
db.commit()
|
||||
return {"success": False, "message": f"同步失败: {str(e)}", "databases": 0, "tables": 0, "columns": 0}
|
||||
'''
|
||||
|
||||
new_content = content[:idx] + new_func
|
||||
|
||||
with open(sys.argv[1], 'w') as f:
|
||||
f.write(new_content)
|
||||
|
||||
print('Patched successfully')
|
||||
Reference in New Issue
Block a user