from typing import Optional from fastapi import APIRouter, Depends, Query, UploadFile, File from sqlalchemy.orm import Session from app.core.database import get_db from app.models.user import User from app.schemas.common import ResponseModel, ListResponse from app.services import unstructured_service from app.api.deps import get_current_user from app.core.events import minio_client from app.core.config import settings from app.models.metadata import UnstructuredFile router = APIRouter() @router.post("/upload") def upload_file( file: UploadFile = File(...), db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): # Determine file type filename = file.filename or "unknown" ext = filename.split(".")[-1].lower() if "." in filename else "" type_map = { "docx": "word", "doc": "word", "xlsx": "excel", "xls": "excel", "pdf": "pdf", "txt": "txt", } file_type = type_map.get(ext, "unknown") # Upload to MinIO storage_path = f"unstructured/{current_user.id}/{filename}" try: data = file.file.read() minio_client.put_object( settings.MINIO_BUCKET_NAME, storage_path, data=data, length=len(data), content_type=file.content_type or "application/octet-stream", ) except Exception as e: return ResponseModel(message=f"上传失败: {e}") db_obj = UnstructuredFile( original_name=filename, file_type=file_type, file_size=len(data), storage_path=storage_path, status="pending", created_by=current_user.id, ) db.add(db_obj) db.commit() db.refresh(db_obj) # Trigger processing try: result = unstructured_service.process_unstructured_file(db, db_obj.id) return ResponseModel(data={"id": db_obj.id, "matches": result.get("matches", []), "status": "processed"}) except Exception as e: return ResponseModel(data={"id": db_obj.id, "status": "error", "error": str(e)}) @router.get("/files") def list_files( page: int = Query(1, ge=1), page_size: int = Query(20, ge=1, le=500), db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): query = db.query(UnstructuredFile).filter(UnstructuredFile.created_by == current_user.id) total = query.count() items = query.order_by(UnstructuredFile.created_at.desc()).offset((page - 1) * page_size).limit(page_size).all() return ListResponse( data=[{ "id": f.id, "original_name": f.original_name, "file_type": f.file_type, "file_size": f.file_size, "status": f.status, "analysis_result": f.analysis_result, "created_at": f.created_at.isoformat() if f.created_at else None, } for f in items], total=total, page=page, page_size=page_size, ) @router.post("/files/{file_id}/reprocess") def reprocess_file( file_id: int, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): file_obj = db.query(UnstructuredFile).filter( UnstructuredFile.id == file_id, UnstructuredFile.created_by == current_user.id, ).first() if not file_obj: from fastapi import HTTPException, status raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="文件不存在") result = unstructured_service.process_unstructured_file(db, file_id) return ResponseModel(data={"matches": result.get("matches", []), "status": "processed"})