# Data Anomaly Detector for Construction
## Overview
Detect unusual patterns, outliers, and anomalies in construction data. Identify cost overruns, schedule delays, productivity issues, and data quality problems before they impact projects.
## Business Case
Construction data often contains anomalies that indicate:
- Cost estimate errors or fraud
- Schedule logic issues
- Productivity problems
- Data entry mistakes
- Equipment or material issues
Early detection prevents costly corrections and project delays.
## Technical Implementation
```python
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional, Tuple
from enum import Enum
import pandas as pd
import numpy as np
from datetime import datetime
from scipy import stats
class AnomalyType(Enum):
OUTLIER = "outlier"
PATTERN_BREAK = "pattern_break"
MISSING_SEQUENCE = "missing_sequence"
DUPLICATE = "duplicate"
IMPOSSIBLE_VALUE = "impossible_value"
TREND_DEVIATION = "trend_deviation"
class AnomalySeverity(Enum):
CRITICAL = "critical"
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
@dataclass
class Anomaly:
id: str
anomaly_type: AnomalyType
severity: AnomalySeverity
field: str
value: Any
expected_range: Optional[Tuple[float, float]] = None
description: str = ""
row_index: Optional[int] = None
detection_method: str = ""
confidence: float = 0.0
suggested_action: str = ""
@dataclass
class AnomalyReport:
source: str
detected_at: datetime
total_records: int
anomalies: List[Anomaly]
summary: Dict[str, int]
class ConstructionAnomalyDetector:
"""Detect anomalies in construction data."""
# Construction-specific thresholds
COST_THRESHOLDS = {
'concrete_per_cy': (200, 800),
'steel_per_ton': (1500, 4000),
'labor_per_hour': (25, 150),
'overhead_percentage': (5, 25),
'conti...
{ copied = true; setTimeout(() => copied = false, 2000) })"
class="absolute right-3 top-3 rounded-lg bg-slate-700 px-3 py-1.5 text-xs font-bold text-slate-200 hover:bg-slate-600 transition"
x-text="copied ? '已复制' : '复制'">