feat: 全量功能模块开发与集成测试修复
- 新增后端模块:Alert、APIAsset、Compliance、Lineage、Masking、Risk、SchemaChange、Unstructured、Watermark - 新增前端模块页面与API接口 - 新增Alembic迁移脚本(002-014)覆盖全量业务表 - 新增测试数据生成脚本与集成测试脚本 - 修复metadata模型JSON类型导入缺失导致启动失败的问题 - 修复前端Alert/APIAsset页面request模块路径错误 - 更新docker-compose与开发计划文档
This commit is contained in:
@@ -3,9 +3,23 @@ from sqlalchemy.orm import Session
|
||||
from fastapi import HTTPException, status
|
||||
|
||||
from app.models.metadata import DataSource, Database, DataTable, DataColumn
|
||||
from app.models.schema_change import SchemaChangeLog
|
||||
from app.services.datasource_service import get_datasource, _decrypt_password
|
||||
|
||||
|
||||
def _log_schema_change(db: Session, source_id: int, change_type: str, database_id: int = None, table_id: int = None, column_id: int = None, old_value: str = None, new_value: str = None):
|
||||
log = SchemaChangeLog(
|
||||
source_id=source_id,
|
||||
database_id=database_id,
|
||||
table_id=table_id,
|
||||
column_id=column_id,
|
||||
change_type=change_type,
|
||||
old_value=old_value,
|
||||
new_value=new_value,
|
||||
)
|
||||
db.add(log)
|
||||
|
||||
|
||||
def get_database(db: Session, db_id: int) -> Optional[Database]:
|
||||
return db.query(Database).filter(Database.id == db_id).first()
|
||||
|
||||
@@ -19,14 +33,14 @@ def get_column(db: Session, column_id: int) -> Optional[DataColumn]:
|
||||
|
||||
|
||||
def list_databases(db: Session, source_id: Optional[int] = None) -> List[Database]:
|
||||
query = db.query(Database)
|
||||
query = db.query(Database).filter(Database.is_deleted == False)
|
||||
if source_id:
|
||||
query = query.filter(Database.source_id == source_id)
|
||||
return query.all()
|
||||
|
||||
|
||||
def list_tables(db: Session, database_id: Optional[int] = None, keyword: Optional[str] = None) -> Tuple[List[DataTable], int]:
|
||||
query = db.query(DataTable)
|
||||
query = db.query(DataTable).filter(DataTable.is_deleted == False)
|
||||
if database_id:
|
||||
query = query.filter(DataTable.database_id == database_id)
|
||||
if keyword:
|
||||
@@ -37,7 +51,7 @@ def list_tables(db: Session, database_id: Optional[int] = None, keyword: Optiona
|
||||
|
||||
|
||||
def list_columns(db: Session, table_id: Optional[int] = None, keyword: Optional[str] = None, page: int = 1, page_size: int = 50) -> Tuple[List[DataColumn], int]:
|
||||
query = db.query(DataColumn)
|
||||
query = db.query(DataColumn).filter(DataColumn.is_deleted == False)
|
||||
if table_id:
|
||||
query = query.filter(DataColumn.table_id == table_id)
|
||||
if keyword:
|
||||
@@ -49,7 +63,7 @@ def list_columns(db: Session, table_id: Optional[int] = None, keyword: Optional[
|
||||
return items, total
|
||||
|
||||
|
||||
def build_tree(db: Session, source_id: Optional[int] = None) -> List[dict]:
|
||||
def build_tree(db: Session, source_id: Optional[int] = None, include_deleted: bool = False) -> List[dict]:
|
||||
sources = db.query(DataSource)
|
||||
if source_id:
|
||||
sources = sources.filter(DataSource.id == source_id)
|
||||
@@ -65,20 +79,24 @@ def build_tree(db: Session, source_id: Optional[int] = None) -> List[dict]:
|
||||
"meta": {"source_type": s.source_type, "status": s.status},
|
||||
}
|
||||
for d in s.databases:
|
||||
if not include_deleted and d.is_deleted:
|
||||
continue
|
||||
db_node = {
|
||||
"id": d.id,
|
||||
"name": d.name,
|
||||
"type": "database",
|
||||
"children": [],
|
||||
"meta": {"charset": d.charset, "table_count": d.table_count},
|
||||
"meta": {"charset": d.charset, "table_count": d.table_count, "is_deleted": d.is_deleted},
|
||||
}
|
||||
for t in d.tables:
|
||||
if not include_deleted and t.is_deleted:
|
||||
continue
|
||||
table_node = {
|
||||
"id": t.id,
|
||||
"name": t.name,
|
||||
"type": "table",
|
||||
"children": [],
|
||||
"meta": {"comment": t.comment, "row_count": t.row_count, "column_count": t.column_count},
|
||||
"meta": {"comment": t.comment, "row_count": t.row_count, "column_count": t.column_count, "is_deleted": t.is_deleted},
|
||||
}
|
||||
db_node["children"].append(table_node)
|
||||
source_node["children"].append(db_node)
|
||||
@@ -86,9 +104,16 @@ def build_tree(db: Session, source_id: Optional[int] = None) -> List[dict]:
|
||||
return result
|
||||
|
||||
|
||||
def _compute_checksum(data: dict) -> str:
|
||||
import hashlib, json
|
||||
payload = json.dumps(data, sort_keys=True, ensure_ascii=False, default=str)
|
||||
return hashlib.sha256(payload.encode()).hexdigest()[:32]
|
||||
|
||||
|
||||
def sync_metadata(db: Session, source_id: int, user_id: int) -> dict:
|
||||
from sqlalchemy import create_engine, inspect, text
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
source = get_datasource(db, source_id)
|
||||
if not source:
|
||||
@@ -118,29 +143,56 @@ def sync_metadata(db: Session, source_id: int, user_id: int) -> dict:
|
||||
inspector = inspect(engine)
|
||||
|
||||
db_names = inspector.get_schema_names() or [source.database_name]
|
||||
scan_time = datetime.utcnow()
|
||||
total_tables = 0
|
||||
total_columns = 0
|
||||
updated_tables = 0
|
||||
updated_columns = 0
|
||||
|
||||
for db_name in db_names:
|
||||
db_obj = db.query(Database).filter(Database.source_id == source.id, Database.name == db_name).first()
|
||||
db_checksum = _compute_checksum({"name": db_name})
|
||||
db_obj = db.query(Database).filter(
|
||||
Database.source_id == source.id, Database.name == db_name
|
||||
).first()
|
||||
if not db_obj:
|
||||
db_obj = Database(source_id=source.id, name=db_name)
|
||||
db_obj = Database(source_id=source.id, name=db_name, checksum=db_checksum, last_scanned_at=scan_time)
|
||||
db.add(db_obj)
|
||||
db.commit()
|
||||
db.refresh(db_obj)
|
||||
else:
|
||||
db_obj.checksum = db_checksum
|
||||
db_obj.last_scanned_at = scan_time
|
||||
db_obj.is_deleted = False
|
||||
db_obj.deleted_at = None
|
||||
|
||||
table_names = inspector.get_table_names(schema=db_name)
|
||||
for tname in table_names:
|
||||
table_obj = db.query(DataTable).filter(DataTable.database_id == db_obj.id, DataTable.name == tname).first()
|
||||
t_checksum = _compute_checksum({"name": tname})
|
||||
table_obj = db.query(DataTable).filter(
|
||||
DataTable.database_id == db_obj.id, DataTable.name == tname
|
||||
).first()
|
||||
if not table_obj:
|
||||
table_obj = DataTable(database_id=db_obj.id, name=tname)
|
||||
table_obj = DataTable(database_id=db_obj.id, name=tname, checksum=t_checksum, last_scanned_at=scan_time)
|
||||
db.add(table_obj)
|
||||
db.commit()
|
||||
db.refresh(table_obj)
|
||||
_log_schema_change(db, source.id, "add_table", database_id=db_obj.id, table_id=table_obj.id, new_value=tname)
|
||||
else:
|
||||
if table_obj.checksum != t_checksum:
|
||||
table_obj.checksum = t_checksum
|
||||
updated_tables += 1
|
||||
table_obj.last_scanned_at = scan_time
|
||||
table_obj.is_deleted = False
|
||||
table_obj.deleted_at = None
|
||||
|
||||
columns = inspector.get_columns(tname, schema=db_name)
|
||||
for col in columns:
|
||||
col_obj = db.query(DataColumn).filter(DataColumn.table_id == table_obj.id, DataColumn.name == col["name"]).first()
|
||||
col_checksum = _compute_checksum({
|
||||
"name": col["name"],
|
||||
"type": str(col.get("type", "")),
|
||||
"max_length": col.get("max_length"),
|
||||
"comment": col.get("comment"),
|
||||
"nullable": col.get("nullable", True),
|
||||
})
|
||||
col_obj = db.query(DataColumn).filter(
|
||||
DataColumn.table_id == table_obj.id, DataColumn.name == col["name"]
|
||||
).first()
|
||||
if not col_obj:
|
||||
sample = None
|
||||
try:
|
||||
@@ -150,7 +202,6 @@ def sync_metadata(db: Session, source_id: int, user_id: int) -> dict:
|
||||
sample = json.dumps(samples, ensure_ascii=False)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
col_obj = DataColumn(
|
||||
table_id=table_obj.id,
|
||||
name=col["name"],
|
||||
@@ -159,13 +210,58 @@ def sync_metadata(db: Session, source_id: int, user_id: int) -> dict:
|
||||
comment=col.get("comment"),
|
||||
is_nullable=col.get("nullable", True),
|
||||
sample_data=sample,
|
||||
checksum=col_checksum,
|
||||
last_scanned_at=scan_time,
|
||||
)
|
||||
db.add(col_obj)
|
||||
total_columns += 1
|
||||
_log_schema_change(db, source.id, "add_column", database_id=db_obj.id, table_id=table_obj.id, column_id=col_obj.id, new_value=col["name"])
|
||||
else:
|
||||
if col_obj.checksum != col_checksum:
|
||||
old_val = f"type={col_obj.data_type}, len={col_obj.length}, comment={col_obj.comment}"
|
||||
new_val = f"type={str(col.get('type', ''))}, len={col.get('max_length')}, comment={col.get('comment')}"
|
||||
_log_schema_change(db, source.id, "change_type", database_id=db_obj.id, table_id=table_obj.id, column_id=col_obj.id, old_value=old_val, new_value=new_val)
|
||||
col_obj.checksum = col_checksum
|
||||
col_obj.data_type = str(col.get("type", ""))
|
||||
col_obj.length = col.get("max_length")
|
||||
col_obj.comment = col.get("comment")
|
||||
col_obj.is_nullable = col.get("nullable", True)
|
||||
updated_columns += 1
|
||||
col_obj.last_scanned_at = scan_time
|
||||
col_obj.is_deleted = False
|
||||
col_obj.deleted_at = None
|
||||
|
||||
total_tables += 1
|
||||
|
||||
db.commit()
|
||||
# Soft-delete objects not seen in this scan and log changes
|
||||
deleted_dbs = db.query(Database).filter(
|
||||
Database.source_id == source.id,
|
||||
Database.last_scanned_at < scan_time,
|
||||
).all()
|
||||
for d in deleted_dbs:
|
||||
_log_schema_change(db, source.id, "drop_database", database_id=d.id, old_value=d.name)
|
||||
d.is_deleted = True
|
||||
d.deleted_at = scan_time
|
||||
|
||||
for db_obj in db.query(Database).filter(Database.source_id == source.id).all():
|
||||
deleted_tables = db.query(DataTable).filter(
|
||||
DataTable.database_id == db_obj.id,
|
||||
DataTable.last_scanned_at < scan_time,
|
||||
).all()
|
||||
for t in deleted_tables:
|
||||
_log_schema_change(db, source.id, "drop_table", database_id=db_obj.id, table_id=t.id, old_value=t.name)
|
||||
t.is_deleted = True
|
||||
t.deleted_at = scan_time
|
||||
|
||||
for table_obj in db.query(DataTable).filter(DataTable.database_id == db_obj.id).all():
|
||||
deleted_cols = db.query(DataColumn).filter(
|
||||
DataColumn.table_id == table_obj.id,
|
||||
DataColumn.last_scanned_at < scan_time,
|
||||
).all()
|
||||
for c in deleted_cols:
|
||||
_log_schema_change(db, source.id, "drop_column", database_id=db_obj.id, table_id=table_obj.id, column_id=c.id, old_value=c.name)
|
||||
c.is_deleted = True
|
||||
c.deleted_at = scan_time
|
||||
|
||||
source.status = "active"
|
||||
db.commit()
|
||||
@@ -176,6 +272,8 @@ def sync_metadata(db: Session, source_id: int, user_id: int) -> dict:
|
||||
"databases": len(db_names),
|
||||
"tables": total_tables,
|
||||
"columns": total_columns,
|
||||
"updated_tables": updated_tables,
|
||||
"updated_columns": updated_columns,
|
||||
}
|
||||
except Exception as e:
|
||||
source.status = "error"
|
||||
|
||||
Reference in New Issue
Block a user