testing-quality
Compare original and translation side by side
🇺🇸
Original
English🇨🇳
Translation
ChineseTesting & Data Quality
测试与数据质量
Production testing strategies with pytest, data validation, and quality frameworks.
基于pytest、数据验证及质量框架的生产环境测试策略。
Quick Start
快速开始
python
import pytest
from unittest.mock import Mock, patch
import pandas as pdpython
import pytest
from unittest.mock import Mock, patch
import pandas as pdFixtures for test data
Fixtures for test data
@pytest.fixture
def sample_dataframe():
return pd.DataFrame({
"id": [1, 2, 3],
"name": ["Alice", "Bob", "Charlie"],
"amount": [100.0, 200.0, 300.0]
})
@pytest.fixture
def mock_database():
with patch("app.db.connection") as mock:
mock.query.return_value = [{"id": 1, "value": 100}]
yield mock
@pytest.fixture
def sample_dataframe():
return pd.DataFrame({
"id": [1, 2, 3],
"name": ["Alice", "Bob", "Charlie"],
"amount": [100.0, 200.0, 300.0]
})
@pytest.fixture
def mock_database():
with patch("app.db.connection") as mock:
mock.query.return_value = [{"id": 1, "value": 100}]
yield mock
Unit test with AAA pattern
Unit test with AAA pattern
class TestDataTransformer:
def test_calculates_total_correctly(self, sample_dataframe):
# Arrange
transformer = DataTransformer()
# Act
result = transformer.calculate_total(sample_dataframe)
# Assert
assert result == 600.0
def test_handles_empty_dataframe(self):
# Arrange
empty_df = pd.DataFrame()
transformer = DataTransformer()
# Act & Assert
with pytest.raises(ValueError, match="Empty dataframe"):
transformer.calculate_total(empty_df)
@pytest.mark.parametrize("input_val,expected", [
(100, 110),
(0, 0),
(-50, -55),
])
def test_apply_tax(self, input_val, expected):
result = apply_tax(input_val, rate=0.10)
assert result == expectedundefinedclass TestDataTransformer:
def test_calculates_total_correctly(self, sample_dataframe):
# Arrange
transformer = DataTransformer()
# Act
result = transformer.calculate_total(sample_dataframe)
# Assert
assert result == 600.0
def test_handles_empty_dataframe(self):
# Arrange
empty_df = pd.DataFrame()
transformer = DataTransformer()
# Act & Assert
with pytest.raises(ValueError, match="Empty dataframe"):
transformer.calculate_total(empty_df)
@pytest.mark.parametrize("input_val,expected", [
(100, 110),
(0, 0),
(-50, -55),
])
def test_apply_tax(self, input_val, expected):
result = apply_tax(input_val, rate=0.10)
assert result == expectedundefinedCore Concepts
核心概念
1. Data Validation with Pydantic
1. 基于Pydantic的数据验证
python
from pydantic import BaseModel, Field, field_validator
from datetime import datetime
from typing import Optional
class DataRecord(BaseModel):
id: str = Field(..., min_length=1)
amount: float = Field(..., ge=0)
timestamp: datetime
category: Optional[str] = None
@field_validator("id")
@classmethod
def validate_id_format(cls, v):
if not v.startswith("REC-"):
raise ValueError("ID must start with 'REC-'")
return v
@field_validator("amount")
@classmethod
def round_amount(cls, v):
return round(v, 2)python
from pydantic import BaseModel, Field, field_validator
from datetime import datetime
from typing import Optional
class DataRecord(BaseModel):
id: str = Field(..., min_length=1)
amount: float = Field(..., ge=0)
timestamp: datetime
category: Optional[str] = None
@field_validator("id")
@classmethod
def validate_id_format(cls, v):
if not v.startswith("REC-"):
raise ValueError("ID must start with 'REC-'")
return v
@field_validator("amount")
@classmethod
def round_amount(cls, v):
return round(v, 2)Validation
Validation
def process_records(raw_data: list[dict]) -> list[DataRecord]:
valid_records = []
for item in raw_data:
try:
record = DataRecord(**item)
valid_records.append(record)
except ValidationError as e:
logger.warning(f"Invalid record: {e}")
return valid_records
undefineddef process_records(raw_data: list[dict]) -> list[DataRecord]:
valid_records = []
for item in raw_data:
try:
record = DataRecord(**item)
valid_records.append(record)
except ValidationError as e:
logger.warning(f"Invalid record: {e}")
return valid_records
undefined2. Great Expectations
2. Great Expectations
python
import great_expectations as gx
from great_expectations.checkpoint import Checkpointpython
import great_expectations as gx
from great_expectations.checkpoint import CheckpointInitialize context
Initialize context
context = gx.get_context()
context = gx.get_context()
Create expectations
Create expectations
validator = context.sources.pandas_default.read_csv("data/orders.csv")
validator = context.sources.pandas_default.read_csv("data/orders.csv")
Column expectations
Column expectations
validator.expect_column_to_exist("order_id")
validator.expect_column_values_to_not_be_null("order_id")
validator.expect_column_values_to_be_unique("order_id")
validator.expect_column_to_exist("order_id")
validator.expect_column_values_to_not_be_null("order_id")
validator.expect_column_values_to_be_unique("order_id")
Value expectations
Value expectations
validator.expect_column_values_to_be_between("amount", min_value=0, max_value=10000)
validator.expect_column_values_to_be_in_set("status", ["pending", "completed", "cancelled"])
validator.expect_column_values_to_be_between("amount", min_value=0, max_value=10000)
validator.expect_column_values_to_be_in_set("status", ["pending", "completed", "cancelled"])
Pattern matching
Pattern matching
validator.expect_column_values_to_match_regex("email", r"^[\w.-]+@[\w.-]+.\w+$")
validator.expect_column_values_to_match_regex("email", r"^[\w.-]+@[\w.-]+.\w+$")
Run validation
Run validation
results = validator.validate()
if not results.success:
failed_expectations = [r for r in results.results if not r.success]
raise DataQualityError(f"Validation failed: {failed_expectations}")
undefinedresults = validator.validate()
if not results.success:
failed_expectations = [r for r in results.results if not r.success]
raise DataQualityError(f"Validation failed: {failed_expectations}")
undefined3. Integration Testing
3. 集成测试
python
import pytest
from testcontainers.postgres import PostgresContainer
from sqlalchemy import create_engine
@pytest.fixture(scope="module")
def postgres_container():
"""Spin up real Postgres for integration tests."""
with PostgresContainer("postgres:16-alpine") as postgres:
yield postgres
@pytest.fixture
def db_engine(postgres_container):
"""Create engine with test database."""
engine = create_engine(postgres_container.get_connection_url())
# Setup schema
with engine.connect() as conn:
conn.execute(text("CREATE TABLE users (id SERIAL PRIMARY KEY, name TEXT)"))
conn.commit()
yield engine
# Cleanup
engine.dispose()
class TestDatabaseOperations:
def test_insert_and_query(self, db_engine):
# Arrange
repo = UserRepository(db_engine)
# Act
repo.insert(User(name="Test User"))
users = repo.get_all()
# Assert
assert len(users) == 1
assert users[0].name == "Test User"
def test_transaction_rollback(self, db_engine):
repo = UserRepository(db_engine)
with pytest.raises(IntegrityError):
repo.insert(User(name=None)) # Violates constraint
# Verify rollback
assert repo.count() == 0python
import pytest
from testcontainers.postgres import PostgresContainer
from sqlalchemy import create_engine
@pytest.fixture(scope="module")
def postgres_container():
"""Spin up real Postgres for integration tests."""
with PostgresContainer("postgres:16-alpine") as postgres:
yield postgres
@pytest.fixture
def db_engine(postgres_container):
"""Create engine with test database."""
engine = create_engine(postgres_container.get_connection_url())
# Setup schema
with engine.connect() as conn:
conn.execute(text("CREATE TABLE users (id SERIAL PRIMARY KEY, name TEXT)"))
conn.commit()
yield engine
# Cleanup
engine.dispose()
class TestDatabaseOperations:
def test_insert_and_query(self, db_engine):
# Arrange
repo = UserRepository(db_engine)
# Act
repo.insert(User(name="Test User"))
users = repo.get_all()
# Assert
assert len(users) == 1
assert users[0].name == "Test User"
def test_transaction_rollback(self, db_engine):
repo = UserRepository(db_engine)
with pytest.raises(IntegrityError):
repo.insert(User(name=None)) # Violates constraint
# Verify rollback
assert repo.count() == 04. Mocking External Services
4. 外部服务Mock
python
from unittest.mock import Mock, patch, MagicMock
import responses
class TestAPIClient:
@responses.activate
def test_fetch_data_success(self):
# Mock HTTP response
responses.add(
responses.GET,
"https://api.example.com/data",
json={"items": [{"id": 1}]},
status=200
)
client = APIClient()
result = client.fetch_data()
assert len(result["items"]) == 1
@responses.activate
def test_handles_api_error(self):
responses.add(
responses.GET,
"https://api.example.com/data",
json={"error": "Server error"},
status=500
)
client = APIClient()
with pytest.raises(APIError):
client.fetch_data()
@patch("app.services.external_api")
def test_with_mock_service(self, mock_api):
mock_api.get_user.return_value = {"id": 1, "name": "Test"}
result = process_user_data(user_id=1)
mock_api.get_user.assert_called_once_with(1)
assert result["name"] == "Test"python
from unittest.mock import Mock, patch, MagicMock
import responses
class TestAPIClient:
@responses.activate
def test_fetch_data_success(self):
# Mock HTTP response
responses.add(
responses.GET,
"https://api.example.com/data",
json={"items": [{"id": 1}]},
status=200
)
client = APIClient()
result = client.fetch_data()
assert len(result["items"]) == 1
@responses.activate
def test_handles_api_error(self):
responses.add(
responses.GET,
"https://api.example.com/data",
json={"error": "Server error"},
status=500
)
client = APIClient()
with pytest.raises(APIError):
client.fetch_data()
@patch("app.services.external_api")
def test_with_mock_service(self, mock_api):
mock_api.get_user.return_value = {"id": 1, "name": "Test"}
result = process_user_data(user_id=1)
mock_api.get_user.assert_called_once_with(1)
assert result["name"] == "Test"Tools & Technologies
工具与技术
| Tool | Purpose | Version (2025) |
|---|---|---|
| pytest | Testing framework | 8.0+ |
| Great Expectations | Data validation | 0.18+ |
| Pydantic | Data validation | 2.5+ |
| pytest-cov | Code coverage | 4.1+ |
| testcontainers | Integration testing | 3.7+ |
| responses | HTTP mocking | 0.25+ |
| hypothesis | Property-based testing | 6.98+ |
| 工具 | 用途 | 版本(2025) |
|---|---|---|
| pytest | 测试框架 | 8.0+ |
| Great Expectations | 数据验证 | 0.18+ |
| Pydantic | 数据验证 | 2.5+ |
| pytest-cov | 代码覆盖率统计 | 4.1+ |
| testcontainers | 集成测试 | 3.7+ |
| responses | HTTP Mock | 0.25+ |
| hypothesis | 属性化测试 | 6.98+ |
Troubleshooting Guide
故障排查指南
| Issue | Symptoms | Root Cause | Fix |
|---|---|---|---|
| Flaky Tests | Random failures | Shared state, timing | Isolate tests, use fixtures |
| Slow Tests | Long test runs | No mocking, real I/O | Mock external services |
| Low Coverage | Uncovered code | Missing edge cases | Add parametrized tests |
| Test Data Issues | Inconsistent results | Hardcoded data | Use factories/fixtures |
| 问题 | 症状 | 根本原因 | 解决方案 |
|---|---|---|---|
| Flaky Tests | 随机失败 | 共享状态、时序问题 | 隔离测试,使用fixtures |
| Slow Tests | 测试运行时间长 | 未使用Mock、真实I/O操作 | Mock外部服务 |
| Low Coverage | 存在未覆盖代码 | 缺失边缘场景 | 添加参数化测试 |
| Test Data Issues | 结果不一致 | 硬编码数据 | 使用工厂类/fixtures |
Best Practices
最佳实践
python
undefinedpython
undefined✅ DO: Use fixtures for setup
✅ DO: Use fixtures for setup
@pytest.fixture
def client():
return TestClient(app)
@pytest.fixture
def client():
return TestClient(app)
✅ DO: Test edge cases
✅ DO: Test edge cases
@pytest.mark.parametrize("input_data", [None, [], {}, ""])
def test_handles_empty_input(input_data):
assert process(input_data) == default_result
@pytest.mark.parametrize("input_data", [None, [], {}, ""])
def test_handles_empty_input(input_data):
assert process(input_data) == default_result
✅ DO: Name tests descriptively
✅ DO: Name tests descriptively
def test_user_creation_fails_with_invalid_email():
...
def test_user_creation_fails_with_invalid_email():
...
✅ DO: Use marks for slow tests
✅ DO: Use marks for slow tests
@pytest.mark.slow
def test_full_pipeline():
...
@pytest.mark.slow
def test_full_pipeline():
...
❌ DON'T: Test implementation details
❌ DON'T: Test implementation details
❌ DON'T: Share state between tests
❌ DON'T: Share state between tests
❌ DON'T: Skip error path testing
❌ DON'T: Skip error path testing
undefinedundefinedResources
资源
Skill Certification Checklist:
- Can write unit tests with pytest
- Can use fixtures and parametrization
- Can implement data validation
- Can write integration tests
- Can mock external dependencies
技能认证清单:
- 能够使用pytest编写单元测试
- 能够使用fixtures和参数化
- 能够实现数据验证
- 能够编写集成测试
- 能够Mock外部依赖