feat: Phase 3-5 - workflow, labeling, reports, dashboard enhancement, tests
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
from io import BytesIO
|
||||
from typing import Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from datetime import datetime
|
||||
|
||||
from docx import Document
|
||||
from docx.shared import Inches, Pt, RGBColor
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
|
||||
from app.models.project import ClassificationProject, ClassificationResult
|
||||
from app.models.classification import Category, DataLevel
|
||||
|
||||
|
||||
def generate_classification_report(db: Session, project_id: int) -> bytes:
|
||||
"""Generate a Word report for a classification project."""
|
||||
project = db.query(ClassificationProject).filter(ClassificationProject.id == project_id).first()
|
||||
if not project:
|
||||
raise ValueError("项目不存在")
|
||||
|
||||
doc = Document()
|
||||
|
||||
# Title
|
||||
title = doc.add_heading('数据分类分级项目报告', 0)
|
||||
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
|
||||
# Basic info
|
||||
doc.add_heading('一、项目基本信息', level=1)
|
||||
info_table = doc.add_table(rows=4, cols=2)
|
||||
info_table.style = 'Light Grid Accent 1'
|
||||
info_data = [
|
||||
('项目名称', project.name),
|
||||
('报告生成时间', datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
|
||||
('项目状态', project.status),
|
||||
('模板版本', project.template.version if project.template else 'N/A'),
|
||||
]
|
||||
for i, (k, v) in enumerate(info_data):
|
||||
info_table.rows[i].cells[0].text = k
|
||||
info_table.rows[i].cells[1].text = str(v)
|
||||
|
||||
# Statistics
|
||||
doc.add_heading('二、分类分级统计', level=1)
|
||||
results = db.query(ClassificationResult).filter(ClassificationResult.project_id == project_id).all()
|
||||
|
||||
total = len(results)
|
||||
auto_count = sum(1 for r in results if r.source == 'auto')
|
||||
manual_count = sum(1 for r in results if r.source == 'manual')
|
||||
|
||||
level_stats = {}
|
||||
for r in results:
|
||||
if r.level:
|
||||
level_stats[r.level.name] = level_stats.get(r.level.name, 0) + 1
|
||||
|
||||
doc.add_paragraph(f'总字段数: {total}')
|
||||
doc.add_paragraph(f'自动识别: {auto_count}')
|
||||
doc.add_paragraph(f'人工打标: {manual_count}')
|
||||
|
||||
doc.add_heading('三、分级分布', level=1)
|
||||
level_table = doc.add_table(rows=1, cols=3)
|
||||
level_table.style = 'Light Grid Accent 1'
|
||||
hdr_cells = level_table.rows[0].cells
|
||||
hdr_cells[0].text = '分级'
|
||||
hdr_cells[1].text = '数量'
|
||||
hdr_cells[2].text = '占比'
|
||||
for level_name, count in sorted(level_stats.items(), key=lambda x: -x[1]):
|
||||
row_cells = level_table.add_row().cells
|
||||
row_cells[0].text = level_name
|
||||
row_cells[1].text = str(count)
|
||||
row_cells[2].text = f'{count / total * 100:.1f}%' if total > 0 else '0%'
|
||||
|
||||
# High risk data
|
||||
doc.add_heading('四、高敏感数据清单(L4/L5)', level=1)
|
||||
high_risk = [r for r in results if r.level and r.level.code in ('L4', 'L5')]
|
||||
if high_risk:
|
||||
risk_table = doc.add_table(rows=1, cols=5)
|
||||
risk_table.style = 'Light Grid Accent 1'
|
||||
hdr = risk_table.rows[0].cells
|
||||
hdr[0].text = '字段名'
|
||||
hdr[1].text = '所属表'
|
||||
hdr[2].text = '分类'
|
||||
hdr[3].text = '分级'
|
||||
hdr[4].text = '来源'
|
||||
for r in high_risk[:100]: # limit to 100 rows
|
||||
row = risk_table.add_row().cells
|
||||
row[0].text = r.column.name if r.column else 'N/A'
|
||||
row[1].text = r.column.table.name if r.column and r.column.table else 'N/A'
|
||||
row[2].text = r.category.name if r.category else 'N/A'
|
||||
row[3].text = r.level.name if r.level else 'N/A'
|
||||
row[4].text = '自动' if r.source == 'auto' else '人工'
|
||||
else:
|
||||
doc.add_paragraph('暂无L4/L5级高敏感数据。')
|
||||
|
||||
# Save to bytes
|
||||
buffer = BytesIO()
|
||||
doc.save(buffer)
|
||||
buffer.seek(0)
|
||||
return buffer.read()
|
||||
@@ -0,0 +1,122 @@
|
||||
from typing import Optional, List, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from fastapi import HTTPException, status
|
||||
|
||||
from app.models.project import ClassificationTask, ClassificationProject, ClassificationResult, TaskStatus, ResultStatus
|
||||
from app.models.metadata import DataColumn, DataTable, Database as MetaDatabase
|
||||
|
||||
|
||||
def get_task(db: Session, task_id: int) -> Optional[ClassificationTask]:
|
||||
return db.query(ClassificationTask).filter(ClassificationTask.id == task_id).first()
|
||||
|
||||
|
||||
def list_tasks(
|
||||
db: Session,
|
||||
project_id: Optional[int] = None,
|
||||
assignee_id: Optional[int] = None,
|
||||
status: Optional[str] = None,
|
||||
page: int = 1,
|
||||
page_size: int = 20,
|
||||
) -> Tuple[List[ClassificationTask], int]:
|
||||
query = db.query(ClassificationTask)
|
||||
if project_id:
|
||||
query = query.filter(ClassificationTask.project_id == project_id)
|
||||
if assignee_id:
|
||||
query = query.filter(ClassificationTask.assignee_id == assignee_id)
|
||||
if status:
|
||||
query = query.filter(ClassificationTask.status == status)
|
||||
total = query.count()
|
||||
items = query.order_by(ClassificationTask.created_at.desc()).offset((page - 1) * page_size).limit(page_size).all()
|
||||
return items, total
|
||||
|
||||
|
||||
def create_task(
|
||||
db: Session,
|
||||
project_id: int,
|
||||
name: str,
|
||||
assigner_id: int,
|
||||
assignee_id: int,
|
||||
target_type: str = "column",
|
||||
target_ids: Optional[str] = None,
|
||||
deadline: Optional[str] = None,
|
||||
) -> ClassificationTask:
|
||||
from datetime import datetime
|
||||
db_obj = ClassificationTask(
|
||||
project_id=project_id,
|
||||
name=name,
|
||||
assigner_id=assigner_id,
|
||||
assignee_id=assignee_id,
|
||||
target_type=target_type,
|
||||
target_ids=target_ids,
|
||||
status=TaskStatus.PENDING.value,
|
||||
deadline=datetime.fromisoformat(deadline) if deadline else None,
|
||||
)
|
||||
db.add(db_obj)
|
||||
db.commit()
|
||||
db.refresh(db_obj)
|
||||
return db_obj
|
||||
|
||||
|
||||
def update_task_status(db: Session, task: ClassificationTask, status: str) -> ClassificationTask:
|
||||
task.status = status
|
||||
if status == TaskStatus.COMPLETED.value:
|
||||
from datetime import datetime
|
||||
task.completed_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
return task
|
||||
|
||||
|
||||
def assign_columns_to_task(db: Session, project_id: int, task_id: int, column_ids: List[int]) -> None:
|
||||
"""Assign columns to a task by creating/updating classification results."""
|
||||
from app.services.project_service import list_results
|
||||
for col_id in column_ids:
|
||||
result = db.query(ClassificationResult).filter(
|
||||
ClassificationResult.project_id == project_id,
|
||||
ClassificationResult.column_id == col_id,
|
||||
).first()
|
||||
if not result:
|
||||
result = ClassificationResult(
|
||||
project_id=project_id,
|
||||
column_id=col_id,
|
||||
status=ResultStatus.AUTO.value,
|
||||
source="auto",
|
||||
confidence=0.0,
|
||||
)
|
||||
db.add(result)
|
||||
db.commit()
|
||||
|
||||
|
||||
def get_task_label_items(db: Session, project_id: int, keyword: Optional[str] = None) -> List[dict]:
|
||||
"""Get all label items for a project (used in task labeling view)."""
|
||||
query = db.query(ClassificationResult).filter(ClassificationResult.project_id == project_id)
|
||||
results = query.all()
|
||||
|
||||
items = []
|
||||
for r in results:
|
||||
col = r.column
|
||||
if not col:
|
||||
continue
|
||||
table = col.table
|
||||
database = table.database if table else None
|
||||
source = database.source if database else None
|
||||
|
||||
items.append({
|
||||
"result_id": r.id,
|
||||
"column_id": col.id,
|
||||
"column_name": col.name,
|
||||
"data_type": col.data_type,
|
||||
"comment": col.comment,
|
||||
"table_name": table.name if table else None,
|
||||
"database_name": database.name if database else None,
|
||||
"source_name": source.name if source else None,
|
||||
"category_id": r.category_id,
|
||||
"category_name": r.category.name if r.category else None,
|
||||
"level_id": r.level_id,
|
||||
"level_name": r.level.name if r.level else None,
|
||||
"level_color": r.level.color if r.level else None,
|
||||
"source": r.source,
|
||||
"confidence": r.confidence,
|
||||
"status": r.status,
|
||||
})
|
||||
return items
|
||||
Reference in New Issue
Block a user