import requests, json from typing import Optional from sqlalchemy.orm import Session from app.models.api_asset import APIAsset, APIEndpoint from app.models.metadata import DataColumn from app.services.classification_engine import match_rule # Simple sensitive keywords for API field detection SENSITIVE_KEYWORDS = [ "password", "pwd", "passwd", "secret", "token", "credit_card", "card_no", "bank_account", "bank_card", "id_card", "id_number", "phone", "mobile", "email", "address", "name", "age", "gender", "salary", "income", "health", "medical", "biometric", "fingerprint", "face", ] def _is_sensitive_field(name: str, schema: dict) -> tuple[bool, str]: low = name.lower() for kw in SENSITIVE_KEYWORDS: if kw in low: return True, f"keyword:{kw}" # Check description / format hints desc = str(schema.get("description", "")).lower() fmt = str(schema.get("format", "")).lower() if "email" in fmt or "email" in desc: return True, "format:email" if "uuid" in fmt and "user" in low: return True, "format:user-uuid" return False, "" def _extract_fields(schema: dict, prefix: str = "") -> list[dict]: fields = [] if not isinstance(schema, dict): return fields props = schema.get("properties", {}) for k, v in props.items(): full_name = f"{prefix}.{k}" if prefix else k sensitive, reason = _is_sensitive_field(k, v) if sensitive: fields.append({"name": full_name, "type": v.get("type", "unknown"), "reason": reason}) # nested object if v.get("type") == "object" and "properties" in v: fields.extend(_extract_fields(v, full_name)) # array items if v.get("type") == "array" and isinstance(v.get("items"), dict): fields.extend(_extract_fields(v["items"], full_name + "[]")) return fields def _risk_level_from_fields(fields: list[dict]) -> str: if not fields: return "low" high_keywords = {"password", "secret", "token", "credit_card", "bank_account", "biometric", "fingerprint", "face"} for f in fields: for kw in high_keywords: if kw in f["name"].lower(): return "critical" if kw in {"password", "secret", "token", "biometric"} else "high" return "medium" def scan_swagger(db: Session, asset_id: int) -> dict: asset = db.query(APIAsset).filter(APIAsset.id == asset_id).first() if not asset: return {"success": False, "error": "Asset not found"} if not asset.swagger_url: return {"success": False, "error": "No swagger_url configured"} asset.scan_status = "scanning" db.commit() try: headers = dict(asset.headers or {}) resp = requests.get(asset.swagger_url, headers=headers, timeout=30) resp.raise_for_status() spec = resp.json() # Clear previous endpoints db.query(APIEndpoint).filter(APIEndpoint.asset_id == asset_id).delete() paths = spec.get("paths", {}) total = 0 sensitive_total = 0 for path, methods in paths.items(): for method, detail in methods.items(): if method.lower() not in {"get","post","put","patch","delete","head","options"}: continue total += 1 parameters = [] for p in detail.get("parameters", []): parameters.append({"name": p.get("name"), "in": p.get("in"), "required": p.get("required", False), "type": p.get("schema",{}).get("type","string")}) req_schema = detail.get("requestBody", {}).get("content", {}).get("application/json", {}).get("schema") resp_schema = None for code, resp_detail in (detail.get("responses", {}).get("200", {}).get("content", {}) or {}).items(): if isinstance(resp_detail, dict) and "schema" in resp_detail: resp_schema = resp_detail["schema"] break # Also try generic 200 if resp_schema is None: ok = detail.get("responses", {}).get("200", {}) for ct, cd in ok.get("content", {}).items(): if isinstance(cd, dict) and "schema" in cd: resp_schema = cd["schema"] break fields = [] if req_schema: fields.extend(_extract_fields(req_schema)) if resp_schema: fields.extend(_extract_fields(resp_schema)) # dedup seen = set() unique_fields = [] for f in fields: if f["name"] not in seen: seen.add(f["name"]) unique_fields.append(f) risk = _risk_level_from_fields(unique_fields) ep = APIEndpoint( asset_id=asset_id, method=method.upper(), path=path, summary=detail.get("summary", ""), tags=detail.get("tags", []), parameters=parameters, request_body_schema=req_schema, response_schema=resp_schema, sensitive_fields=unique_fields, risk_level=risk, ) db.add(ep) if unique_fields: sensitive_total += 1 asset.scan_status = "completed" asset.total_endpoints = total asset.sensitive_endpoints = sensitive_total asset.updated_at = __import__('datetime').datetime.utcnow() db.commit() return {"success": True, "total": total, "sensitive": sensitive_total} except Exception as e: asset.scan_status = "failed" db.commit() return {"success": False, "error": str(e)} def create_asset(db: Session, data: dict, user_id: Optional[int] = None) -> APIAsset: asset = APIAsset( name=data["name"], base_url=data["base_url"], swagger_url=data.get("swagger_url"), auth_type=data.get("auth_type", "none"), headers=data.get("headers"), description=data.get("description"), created_by=user_id, ) db.add(asset) db.commit() db.refresh(asset) return asset def update_asset(db: Session, asset_id: int, data: dict) -> Optional[APIAsset]: asset = db.query(APIAsset).filter(APIAsset.id == asset_id).first() if not asset: return None for k, v in data.items(): if hasattr(asset, k): setattr(asset, k, v) db.commit() db.refresh(asset) return asset def delete_asset(db: Session, asset_id: int) -> bool: asset = db.query(APIAsset).filter(APIAsset.id == asset_id).first() if not asset: return False db.delete(asset) db.commit() return True