feat: Phase 3-5 - workflow, labeling, reports, dashboard enhancement, tests

This commit is contained in:
hiderfong
2026-04-22 17:22:11 +08:00
parent e71b13fe39
commit fb4aaad9fc
50 changed files with 741 additions and 323 deletions
+96
View File
@@ -0,0 +1,96 @@
from io import BytesIO
from typing import Optional
from sqlalchemy.orm import Session
from datetime import datetime
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from app.models.project import ClassificationProject, ClassificationResult
from app.models.classification import Category, DataLevel
def generate_classification_report(db: Session, project_id: int) -> bytes:
"""Generate a Word report for a classification project."""
project = db.query(ClassificationProject).filter(ClassificationProject.id == project_id).first()
if not project:
raise ValueError("项目不存在")
doc = Document()
# Title
title = doc.add_heading('数据分类分级项目报告', 0)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Basic info
doc.add_heading('一、项目基本信息', level=1)
info_table = doc.add_table(rows=4, cols=2)
info_table.style = 'Light Grid Accent 1'
info_data = [
('项目名称', project.name),
('报告生成时间', datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
('项目状态', project.status),
('模板版本', project.template.version if project.template else 'N/A'),
]
for i, (k, v) in enumerate(info_data):
info_table.rows[i].cells[0].text = k
info_table.rows[i].cells[1].text = str(v)
# Statistics
doc.add_heading('二、分类分级统计', level=1)
results = db.query(ClassificationResult).filter(ClassificationResult.project_id == project_id).all()
total = len(results)
auto_count = sum(1 for r in results if r.source == 'auto')
manual_count = sum(1 for r in results if r.source == 'manual')
level_stats = {}
for r in results:
if r.level:
level_stats[r.level.name] = level_stats.get(r.level.name, 0) + 1
doc.add_paragraph(f'总字段数: {total}')
doc.add_paragraph(f'自动识别: {auto_count}')
doc.add_paragraph(f'人工打标: {manual_count}')
doc.add_heading('三、分级分布', level=1)
level_table = doc.add_table(rows=1, cols=3)
level_table.style = 'Light Grid Accent 1'
hdr_cells = level_table.rows[0].cells
hdr_cells[0].text = '分级'
hdr_cells[1].text = '数量'
hdr_cells[2].text = '占比'
for level_name, count in sorted(level_stats.items(), key=lambda x: -x[1]):
row_cells = level_table.add_row().cells
row_cells[0].text = level_name
row_cells[1].text = str(count)
row_cells[2].text = f'{count / total * 100:.1f}%' if total > 0 else '0%'
# High risk data
doc.add_heading('四、高敏感数据清单(L4/L5', level=1)
high_risk = [r for r in results if r.level and r.level.code in ('L4', 'L5')]
if high_risk:
risk_table = doc.add_table(rows=1, cols=5)
risk_table.style = 'Light Grid Accent 1'
hdr = risk_table.rows[0].cells
hdr[0].text = '字段名'
hdr[1].text = '所属表'
hdr[2].text = '分类'
hdr[3].text = '分级'
hdr[4].text = '来源'
for r in high_risk[:100]: # limit to 100 rows
row = risk_table.add_row().cells
row[0].text = r.column.name if r.column else 'N/A'
row[1].text = r.column.table.name if r.column and r.column.table else 'N/A'
row[2].text = r.category.name if r.category else 'N/A'
row[3].text = r.level.name if r.level else 'N/A'
row[4].text = '自动' if r.source == 'auto' else '人工'
else:
doc.add_paragraph('暂无L4/L5级高敏感数据。')
# Save to bytes
buffer = BytesIO()
doc.save(buffer)
buffer.seek(0)
return buffer.read()
+122
View File
@@ -0,0 +1,122 @@
from typing import Optional, List, Tuple
from sqlalchemy.orm import Session
from fastapi import HTTPException, status
from app.models.project import ClassificationTask, ClassificationProject, ClassificationResult, TaskStatus, ResultStatus
from app.models.metadata import DataColumn, DataTable, Database as MetaDatabase
def get_task(db: Session, task_id: int) -> Optional[ClassificationTask]:
return db.query(ClassificationTask).filter(ClassificationTask.id == task_id).first()
def list_tasks(
db: Session,
project_id: Optional[int] = None,
assignee_id: Optional[int] = None,
status: Optional[str] = None,
page: int = 1,
page_size: int = 20,
) -> Tuple[List[ClassificationTask], int]:
query = db.query(ClassificationTask)
if project_id:
query = query.filter(ClassificationTask.project_id == project_id)
if assignee_id:
query = query.filter(ClassificationTask.assignee_id == assignee_id)
if status:
query = query.filter(ClassificationTask.status == status)
total = query.count()
items = query.order_by(ClassificationTask.created_at.desc()).offset((page - 1) * page_size).limit(page_size).all()
return items, total
def create_task(
db: Session,
project_id: int,
name: str,
assigner_id: int,
assignee_id: int,
target_type: str = "column",
target_ids: Optional[str] = None,
deadline: Optional[str] = None,
) -> ClassificationTask:
from datetime import datetime
db_obj = ClassificationTask(
project_id=project_id,
name=name,
assigner_id=assigner_id,
assignee_id=assignee_id,
target_type=target_type,
target_ids=target_ids,
status=TaskStatus.PENDING.value,
deadline=datetime.fromisoformat(deadline) if deadline else None,
)
db.add(db_obj)
db.commit()
db.refresh(db_obj)
return db_obj
def update_task_status(db: Session, task: ClassificationTask, status: str) -> ClassificationTask:
task.status = status
if status == TaskStatus.COMPLETED.value:
from datetime import datetime
task.completed_at = datetime.utcnow()
db.commit()
db.refresh(task)
return task
def assign_columns_to_task(db: Session, project_id: int, task_id: int, column_ids: List[int]) -> None:
"""Assign columns to a task by creating/updating classification results."""
from app.services.project_service import list_results
for col_id in column_ids:
result = db.query(ClassificationResult).filter(
ClassificationResult.project_id == project_id,
ClassificationResult.column_id == col_id,
).first()
if not result:
result = ClassificationResult(
project_id=project_id,
column_id=col_id,
status=ResultStatus.AUTO.value,
source="auto",
confidence=0.0,
)
db.add(result)
db.commit()
def get_task_label_items(db: Session, project_id: int, keyword: Optional[str] = None) -> List[dict]:
"""Get all label items for a project (used in task labeling view)."""
query = db.query(ClassificationResult).filter(ClassificationResult.project_id == project_id)
results = query.all()
items = []
for r in results:
col = r.column
if not col:
continue
table = col.table
database = table.database if table else None
source = database.source if database else None
items.append({
"result_id": r.id,
"column_id": col.id,
"column_name": col.name,
"data_type": col.data_type,
"comment": col.comment,
"table_name": table.name if table else None,
"database_name": database.name if database else None,
"source_name": source.name if source else None,
"category_id": r.category_id,
"category_name": r.category.name if r.category else None,
"level_id": r.level_id,
"level_name": r.level.name if r.level else None,
"level_color": r.level.color if r.level else None,
"source": r.source,
"confidence": r.confidence,
"status": r.status,
})
return items