feat: 全量功能模块开发与集成测试修复
- 新增后端模块:Alert、APIAsset、Compliance、Lineage、Masking、Risk、SchemaChange、Unstructured、Watermark - 新增前端模块页面与API接口 - 新增Alembic迁移脚本(002-014)覆盖全量业务表 - 新增测试数据生成脚本与集成测试脚本 - 修复metadata模型JSON类型导入缺失导致启动失败的问题 - 修复前端Alert/APIAsset页面request模块路径错误 - 更新docker-compose与开发计划文档
This commit is contained in:
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
Generate synthetic manual-labeled data for ML model training/demo.
|
||||
Run this script after metadata has been scanned so there are columns to label.
|
||||
"""
|
||||
import random
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from app.core.database import SessionLocal
|
||||
from app.models.metadata import DataColumn
|
||||
from app.models.classification import Category
|
||||
from app.models.project import ClassificationResult
|
||||
|
||||
|
||||
def main():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
columns = db.query(DataColumn).limit(300).all()
|
||||
if not columns:
|
||||
print("No columns found in database. Please scan a data source first.")
|
||||
return
|
||||
|
||||
categories = db.query(Category).filter(Category.level == 2).all()
|
||||
if not categories:
|
||||
print("No sub-categories found.")
|
||||
return
|
||||
|
||||
# Clear old manual labels to avoid duplicates
|
||||
db.query(ClassificationResult).filter(ClassificationResult.source == "manual").delete()
|
||||
db.commit()
|
||||
|
||||
count = 0
|
||||
for col in columns:
|
||||
# Deterministic pseudo-random based on column name for reproducibility
|
||||
rng = random.Random(col.name)
|
||||
cat = rng.choice(categories)
|
||||
# Create a fake manual result (project_id=1 assumed to exist or None)
|
||||
result = ClassificationResult(
|
||||
project_id=None,
|
||||
column_id=col.id,
|
||||
category_id=cat.id,
|
||||
level_id=cat.parent.level if cat.parent else 3, # fallback
|
||||
source="manual",
|
||||
confidence=1.0,
|
||||
status="manual",
|
||||
)
|
||||
db.add(result)
|
||||
count += 1
|
||||
|
||||
db.commit()
|
||||
print(f"Generated {count} manual labels across {len(categories)} categories.")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user