feat: Phase 3-5 - workflow, labeling, reports, dashboard enhancement, tests

2026-04-22 17:22:11 +08:00
parent e71b13fe39
commit fb4aaad9fc
50 changed files with 741 additions and 323 deletions
@@ -0,0 +1,96 @@
+from io import BytesIO
+from typing import Optional
+from sqlalchemy.orm import Session
+from datetime import datetime
+
+from docx import Document
+from docx.shared import Inches, Pt, RGBColor
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+
+from app.models.project import ClassificationProject, ClassificationResult
+from app.models.classification import Category, DataLevel
+
+
+def generate_classification_report(db: Session, project_id: int) -> bytes:
+    """Generate a Word report for a classification project."""
+    project = db.query(ClassificationProject).filter(ClassificationProject.id == project_id).first()
+    if not project:
+        raise ValueError("项目不存在")
+
+    doc = Document()
+
+    # Title
+    title = doc.add_heading('数据分类分级项目报告', 0)
+    title.alignment = WD_ALIGN_PARAGRAPH.CENTER
+
+    # Basic info
+    doc.add_heading('一、项目基本信息', level=1)
+    info_table = doc.add_table(rows=4, cols=2)
+    info_table.style = 'Light Grid Accent 1'
+    info_data = [
+        ('项目名称', project.name),
+        ('报告生成时间', datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
+        ('项目状态', project.status),
+        ('模板版本', project.template.version if project.template else 'N/A'),
+    ]
+    for i, (k, v) in enumerate(info_data):
+        info_table.rows[i].cells[0].text = k
+        info_table.rows[i].cells[1].text = str(v)
+
+    # Statistics
+    doc.add_heading('二、分类分级统计', level=1)
+    results = db.query(ClassificationResult).filter(ClassificationResult.project_id == project_id).all()
+
+    total = len(results)
+    auto_count = sum(1 for r in results if r.source == 'auto')
+    manual_count = sum(1 for r in results if r.source == 'manual')
+
+    level_stats = {}
+    for r in results:
+        if r.level:
+            level_stats[r.level.name] = level_stats.get(r.level.name, 0) + 1
+
+    doc.add_paragraph(f'总字段数: {total}')
+    doc.add_paragraph(f'自动识别: {auto_count}')
+    doc.add_paragraph(f'人工打标: {manual_count}')
+
+    doc.add_heading('三、分级分布', level=1)
+    level_table = doc.add_table(rows=1, cols=3)
+    level_table.style = 'Light Grid Accent 1'
+    hdr_cells = level_table.rows[0].cells
+    hdr_cells[0].text = '分级'
+    hdr_cells[1].text = '数量'
+    hdr_cells[2].text = '占比'
+    for level_name, count in sorted(level_stats.items(), key=lambda x: -x[1]):
+        row_cells = level_table.add_row().cells
+        row_cells[0].text = level_name
+        row_cells[1].text = str(count)
+        row_cells[2].text = f'{count / total * 100:.1f}%' if total > 0 else '0%'
+
+    # High risk data
+    doc.add_heading('四、高敏感数据清单（L4/L5）', level=1)
+    high_risk = [r for r in results if r.level and r.level.code in ('L4', 'L5')]
+    if high_risk:
+        risk_table = doc.add_table(rows=1, cols=5)
+        risk_table.style = 'Light Grid Accent 1'
+        hdr = risk_table.rows[0].cells
+        hdr[0].text = '字段名'
+        hdr[1].text = '所属表'
+        hdr[2].text = '分类'
+        hdr[3].text = '分级'
+        hdr[4].text = '来源'
+        for r in high_risk[:100]:  # limit to 100 rows
+            row = risk_table.add_row().cells
+            row[0].text = r.column.name if r.column else 'N/A'
+            row[1].text = r.column.table.name if r.column and r.column.table else 'N/A'
+            row[2].text = r.category.name if r.category else 'N/A'
+            row[3].text = r.level.name if r.level else 'N/A'
+            row[4].text = '自动' if r.source == 'auto' else '人工'
+    else:
+        doc.add_paragraph('暂无L4/L5级高敏感数据。')
+
+    # Save to bytes
+    buffer = BytesIO()
+    doc.save(buffer)
+    buffer.seek(0)
+    return buffer.read()
@@ -0,0 +1,122 @@
+from typing import Optional, List, Tuple
+from sqlalchemy.orm import Session
+from fastapi import HTTPException, status
+
+from app.models.project import ClassificationTask, ClassificationProject, ClassificationResult, TaskStatus, ResultStatus
+from app.models.metadata import DataColumn, DataTable, Database as MetaDatabase
+
+
+def get_task(db: Session, task_id: int) -> Optional[ClassificationTask]:
+    return db.query(ClassificationTask).filter(ClassificationTask.id == task_id).first()
+
+
+def list_tasks(
+    db: Session,
+    project_id: Optional[int] = None,
+    assignee_id: Optional[int] = None,
+    status: Optional[str] = None,
+    page: int = 1,
+    page_size: int = 20,
+) -> Tuple[List[ClassificationTask], int]:
+    query = db.query(ClassificationTask)
+    if project_id:
+        query = query.filter(ClassificationTask.project_id == project_id)
+    if assignee_id:
+        query = query.filter(ClassificationTask.assignee_id == assignee_id)
+    if status:
+        query = query.filter(ClassificationTask.status == status)
+    total = query.count()
+    items = query.order_by(ClassificationTask.created_at.desc()).offset((page - 1) * page_size).limit(page_size).all()
+    return items, total
+
+
+def create_task(
+    db: Session,
+    project_id: int,
+    name: str,
+    assigner_id: int,
+    assignee_id: int,
+    target_type: str = "column",
+    target_ids: Optional[str] = None,
+    deadline: Optional[str] = None,
+) -> ClassificationTask:
+    from datetime import datetime
+    db_obj = ClassificationTask(
+        project_id=project_id,
+        name=name,
+        assigner_id=assigner_id,
+        assignee_id=assignee_id,
+        target_type=target_type,
+        target_ids=target_ids,
+        status=TaskStatus.PENDING.value,
+        deadline=datetime.fromisoformat(deadline) if deadline else None,
+    )
+    db.add(db_obj)
+    db.commit()
+    db.refresh(db_obj)
+    return db_obj
+
+
+def update_task_status(db: Session, task: ClassificationTask, status: str) -> ClassificationTask:
+    task.status = status
+    if status == TaskStatus.COMPLETED.value:
+        from datetime import datetime
+        task.completed_at = datetime.utcnow()
+    db.commit()
+    db.refresh(task)
+    return task
+
+
+def assign_columns_to_task(db: Session, project_id: int, task_id: int, column_ids: List[int]) -> None:
+    """Assign columns to a task by creating/updating classification results."""
+    from app.services.project_service import list_results
+    for col_id in column_ids:
+        result = db.query(ClassificationResult).filter(
+            ClassificationResult.project_id == project_id,
+            ClassificationResult.column_id == col_id,
+        ).first()
+        if not result:
+            result = ClassificationResult(
+                project_id=project_id,
+                column_id=col_id,
+                status=ResultStatus.AUTO.value,
+                source="auto",
+                confidence=0.0,
+            )
+            db.add(result)
+    db.commit()
+
+
+def get_task_label_items(db: Session, project_id: int, keyword: Optional[str] = None) -> List[dict]:
+    """Get all label items for a project (used in task labeling view)."""
+    query = db.query(ClassificationResult).filter(ClassificationResult.project_id == project_id)
+    results = query.all()
+
+    items = []
+    for r in results:
+        col = r.column
+        if not col:
+            continue
+        table = col.table
+        database = table.database if table else None
+        source = database.source if database else None
+
+        items.append({
+            "result_id": r.id,
+            "column_id": col.id,
+            "column_name": col.name,
+            "data_type": col.data_type,
+            "comment": col.comment,
+            "table_name": table.name if table else None,
+            "database_name": database.name if database else None,
+            "source_name": source.name if source else None,
+            "category_id": r.category_id,
+            "category_name": r.category.name if r.category else None,
+            "level_id": r.level_id,
+            "level_name": r.level.name if r.level else None,
+            "level_color": r.level.color if r.level else None,
+            "source": r.source,
+            "confidence": r.confidence,
+            "status": r.status,
+        })
+    return items