Loading...
Loading...
Compare original and translation side by side
references/vision-requests.mdreferences/visionkit-scanner.mdreferences/vision-requests.mdreferences/visionkit-scanner.md| Aspect | Modern (iOS 18+) | Legacy |
|---|---|---|
| Pattern | | |
| Request types | Swift types — structs and classes ( | ObjC classes ( |
| Concurrency | Native async/await | Completion handlers or synchronous |
| Observations | Typed return values | Cast |
| Availability | iOS 18+ / macOS 15+ | iOS 11+ |
ImageProcessingRequestperform(on:orientation:)CGImageCIImageCVPixelBufferCMSampleBufferDataURLTrackObjectRequestTrackRectangleRequestDetectTrajectoriesRequest| 维度 | 现代版(iOS 18+) | 传统版 |
|---|---|---|
| 模式 | | |
| 请求类型 | Swift类型——结构体和类( | OC类( |
| 并发机制 | 原生async/await | 完成回调或同步 |
| 观测结果 | 强类型返回值 | 从 |
| 适配版本 | iOS 18+ / macOS 15+ | iOS 11+ |
ImageProcessingRequestperform(on:orientation:)CGImageCIImageCVPixelBufferCMSampleBufferDataURLTrackObjectRequestTrackRectangleRequestDetectTrajectoriesRequestperform(on:)import Vision
func recognizeText(in image: CGImage) async throws -> [String] {
var request = RecognizeTextRequest()
request.recognitionLevel = .accurate
request.recognitionLanguages = [Locale.Language(identifier: "en-US")]
let observations = try await request.perform(on: image)
return observations.compactMap { observation in
observation.topCandidates(1).first?.string
}
}perform(on:)import Vision
func recognizeText(in image: CGImage) async throws -> [String] {
var request = RecognizeTextRequest()
request.recognitionLevel = .accurate
request.recognitionLanguages = [Locale.Language(identifier: "en-US")]
let observations = try await request.perform(on: image)
return observations.compactMap { observation in
observation.topCandidates(1).first?.string
}
}VNImageRequestHandlerimport Vision
func recognizeTextLegacy(in image: CGImage) throws -> [String] {
var recognized: [String] = []
let request = VNRecognizeTextRequest { request, error in
guard let observations = request.results as? [VNRecognizedTextObservation] else { return }
recognized = observations.compactMap { $0.topCandidates(1).first?.string }
}
request.recognitionLevel = .accurate
let handler = VNImageRequestHandler(cgImage: image)
try handler.perform([request])
return recognized
}VNImageRequestHandlerimport Vision
func recognizeTextLegacy(in image: CGImage) throws -> [String] {
var recognized: [String] = []
let request = VNRecognizeTextRequest { request, error in
guard let observations = request.results as? [VNRecognizedTextObservation] else { return }
recognized = observations.compactMap { $0.topCandidates(1).first?.string }
}
request.recognitionLevel = .accurate
let handler = VNImageRequestHandler(cgImage: image)
try handler.perform([request])
return recognized
}var request = RecognizeTextRequest()
request.recognitionLevel = .accurate // .fast for real-time
request.recognitionLanguages = [
Locale.Language(identifier: "en-US"),
Locale.Language(identifier: "fr-FR"),
]
request.usesLanguageCorrection = true
request.customWords = ["SwiftUI", "Xcode"] // domain-specific terms
let observations = try await request.perform(on: cgImage)
for observation in observations {
guard let candidate = observation.topCandidates(1).first else { continue }
let text = candidate.string
let confidence = candidate.confidence // 0.0 ... 1.0
let bounds = observation.boundingBox // normalized coordinates
}var request = RecognizeTextRequest()
request.recognitionLevel = .accurate // .fast适用于实时场景
request.recognitionLanguages = [
Locale.Language(identifier: "en-US"),
Locale.Language(identifier: "fr-FR"),
]
request.usesLanguageCorrection = true
request.customWords = ["SwiftUI", "Xcode"] // 领域特定术语
let observations = try await request.perform(on: cgImage)
for observation in observations {
guard let candidate = observation.topCandidates(1).first else { continue }
let text = candidate.string
let confidence = candidate.confidence // 0.0 ... 1.0
let bounds = observation.boundingBox // 归一化坐标
}let request = VNRecognizeTextRequest()
request.recognitionLevel = .accurate
request.recognitionLanguages = ["en-US", "fr-FR"]
request.usesLanguageCorrection = trueLocale.Language.accurate.fastlet request = VNRecognizeTextRequest()
request.recognitionLevel = .accurate
request.recognitionLanguages = ["en-US", "fr-FR"]
request.usesLanguageCorrection = trueLocale.Language.accurate.fast// Modern API
let faceRequest = DetectFaceRectanglesRequest()
let faces = try await faceRequest.perform(on: cgImage)
for face in faces {
let boundingBox = face.boundingBox // normalized CGRect
let roll = face.roll // Measurement<UnitAngle>
let yaw = face.yaw // Measurement<UnitAngle>
}
// Landmarks (eyes, nose, mouth contours)
var landmarkRequest = DetectFaceLandmarksRequest()
let landmarkFaces = try await landmarkRequest.perform(on: cgImage)
for face in landmarkFaces {
let landmarks = face.landmarks
let leftEye = landmarks?.leftEye?.normalizedPoints
let nose = landmarks?.nose?.normalizedPoints
}// 现代API
let faceRequest = DetectFaceRectanglesRequest()
let faces = try await faceRequest.perform(on: cgImage)
for face in faces {
let boundingBox = face.boundingBox // 归一化CGRect
let roll = face.roll // Measurement<UnitAngle>
let yaw = face.yaw // Measurement<UnitAngle>
}
// 面部特征点(眼睛、鼻子、嘴巴轮廓)
var landmarkRequest = DetectFaceLandmarksRequest()
let landmarkFaces = try await landmarkRequest.perform(on: cgImage)
for face in landmarkFaces {
let landmarks = face.landmarks
let leftEye = landmarks?.leftEye?.normalizedPoints
let nose = landmarks?.nose?.normalizedPoints
}func convertToUIKit(_ rect: CGRect, imageHeight: CGFloat) -> CGRect {
CGRect(
x: rect.origin.x,
y: imageHeight - rect.origin.y - rect.height,
width: rect.width,
height: rect.height
)
}func convertToUIKit(_ rect: CGRect, imageHeight: CGFloat) -> CGRect {
CGRect(
x: rect.origin.x,
y: imageHeight - rect.origin.y - rect.height,
width: rect.width,
height: rect.height
)
}var request = DetectBarcodesRequest()
request.symbologies = [.qr, .ean13, .code128, .pdf417]
let barcodes = try await request.perform(on: cgImage)
for barcode in barcodes {
let payload = barcode.payloadString // decoded content
let symbology = barcode.symbology // .qr, .ean13, etc.
let bounds = barcode.boundingBox // normalized rect
}.qr.aztec.pdf417.dataMatrix.ean8.ean13.code39.code128.upce.itf14var request = DetectBarcodesRequest()
request.symbologies = [.qr, .ean13, .code128, .pdf417]
let barcodes = try await request.perform(on: cgImage)
for barcode in barcodes {
let payload = barcode.payloadString // 解码内容
let symbology = barcode.symbology // .qr, .ean13等
let bounds = barcode.boundingBox // 归一化矩形
}.qr.aztec.pdf417.dataMatrix.ean8.ean13.code39.code128.upce.itf14RecognizeDocumentsRequestDocumentObservationContainervar request = RecognizeDocumentsRequest()
let documents = try await request.perform(on: cgImage)
for observation in documents {
let container = observation.document
// Full text content
let fullText = container.text
// Structured access to paragraphs
for paragraph in container.paragraphs {
let paragraphText = paragraph.text
}
// Tables and lists
for table in container.tables { /* structured table data */ }
for list in container.lists { /* structured list data */ }
// Embedded barcodes detected within the document
for barcode in container.barcodes { /* barcode data */ }
// Document title if detected
if let title = container.title { print(title) }
}VNDocumentCameraViewControllerRecognizeDocumentsRequestContainerDocumentObservationvar request = RecognizeDocumentsRequest()
let documents = try await request.perform(on: cgImage)
for observation in documents {
let container = observation.document
// 完整文本内容
let fullText = container.text
// 结构化访问段落
for paragraph in container.paragraphs {
let paragraphText = paragraph.text
}
// 表格和列表
for table in container.tables { /* 结构化表格数据 */ }
for list in container.lists { /* 结构化列表数据 */ }
// 文档中嵌入的条形码
for barcode in container.barcodes { /* 条形码数据 */ }
// 检测到的文档标题
if let title = container.title { print(title) }
}VNDocumentCameraViewControllervar request = GeneratePersonSegmentationRequest()
request.qualityLevel = .accurate // .balanced, .fast
let mask = try await request.perform(on: cgImage)
// mask is a PersonSegmentationObservation with a pixelBuffer property
let maskBuffer = mask.pixelBuffer
// Apply mask using Core Image: CIFilter.blendWithMask()var request = GeneratePersonSegmentationRequest()
request.qualityLevel = .accurate // .balanced, .fast
let mask = try await request.perform(on: cgImage)
// mask为PersonSegmentationObservation对象,包含pixelBuffer属性
let maskBuffer = mask.pixelBuffer
// 使用Core Image应用蒙版:CIFilter.blendWithMask()let request = VNGeneratePersonSegmentationRequest()
request.qualityLevel = .accurate // .balanced, .fast
request.outputPixelFormat = kCVPixelFormatType_OneComponent8
let handler = VNImageRequestHandler(cgImage: cgImage)
try handler.perform([request])
guard let mask = request.results?.first?.pixelBuffer else { return }
// Apply mask using Core Image: CIFilter.blendWithMask().accurate.balanced.fastlet request = VNGeneratePersonSegmentationRequest()
request.qualityLevel = .accurate // .balanced, .fast
request.outputPixelFormat = kCVPixelFormatType_OneComponent8
let handler = VNImageRequestHandler(cgImage: cgImage)
try handler.perform([request])
guard let mask = request.results?.first?.pixelBuffer else { return }
// 使用Core Image应用蒙版:CIFilter.blendWithMask().accurate.balanced.fast// Modern API (iOS 18+)
let request = GeneratePersonInstanceMaskRequest()
let observation = try await request.perform(on: cgImage)
let indices = observation.allInstances
for index in indices {
let mask = try observation.generateMask(forInstances: IndexSet(integer: index))
// mask is a CVPixelBuffer with only this person visible
}// Legacy API (iOS 17+)
let request = VNGeneratePersonInstanceMaskRequest()
let handler = VNImageRequestHandler(cgImage: cgImage)
try handler.perform([request])
guard let result = request.results?.first else { return }
let indices = result.allInstances
for index in indices {
let instanceMask = try result.generateMaskedImage(
ofInstances: IndexSet(integer: index),
from: handler,
croppedToInstancesExtent: false
)
}references/vision-requests.md// 现代API(iOS 18+)
let request = GeneratePersonInstanceMaskRequest()
let observation = try await request.perform(on: cgImage)
let indices = observation.allInstances
for index in indices {
let mask = try observation.generateMask(forInstances: IndexSet(integer: index))
// mask为仅包含当前人物的CVPixelBuffer
}// 传统API(iOS 17+)
let request = VNGeneratePersonInstanceMaskRequest()
let handler = VNImageRequestHandler(cgImage: cgImage)
try handler.perform([request])
guard let result = request.results?.first else { return }
let indices = result.allInstances
for index in indices {
let instanceMask = try result.generateMaskedImage(
ofInstances: IndexSet(integer: index),
from: handler,
croppedToInstancesExtent: false
)
}references/vision-requests.mdTrackObjectRequestImageProcessingRequestStatefulRequest// Initialize with a detected object's bounding box
let initialObservation = DetectedObjectObservation(boundingBox: detectedRect)
var request = TrackObjectRequest(observation: initialObservation)
request.trackingLevel = .accurate
// For each video frame:
let results = try await request.perform(on: pixelBuffer)
if let tracked = results.first {
let updatedBounds = tracked.boundingBox
let confidence = tracked.confidence
}TrackObjectRequestImageProcessingRequestStatefulRequest// 基于检测到的物体边界框初始化
let initialObservation = DetectedObjectObservation(boundingBox: detectedRect)
var request = TrackObjectRequest(observation: initialObservation)
request.trackingLevel = .accurate
// 处理每一帧视频:
let results = try await request.perform(on: pixelBuffer)
if let tracked = results.first {
let updatedBounds = tracked.boundingBox
let confidence = tracked.confidence
}let trackRequest = VNTrackObjectRequest(detectedObjectObservation: initialObservation)
trackRequest.trackingLevel = .accurate
let sequenceHandler = VNSequenceRequestHandler()
// For each frame:
try sequenceHandler.perform([trackRequest], on: pixelBuffer)
if let result = trackRequest.results?.first {
let updatedBounds = result.boundingBox
trackRequest.inputObservation = result
}let trackRequest = VNTrackObjectRequest(detectedObjectObservation: initialObservation)
trackRequest.trackingLevel = .accurate
let sequenceHandler = VNSequenceRequestHandler()
// 处理每一帧:
try sequenceHandler.perform([trackRequest], on: pixelBuffer)
if let result = trackRequest.results?.first {
let updatedBounds = result.boundingBox
trackRequest.inputObservation = result
}references/vision-requests.md| Request | Purpose |
|---|---|
| Classify scene content (outdoor, food, animal, etc.) |
| Heat map of where viewers focus attention |
| Heat map of object-like regions |
| Foreground object segmentation (not person-specific) |
| Detect rectangular shapes (documents, cards, screens) |
| Detect horizon angle for auto-leveling photos |
| Detect body joints (shoulders, elbows, knees) |
| 3D human body pose estimation |
| Detect hand joints and finger positions |
| Detect animal body joint positions |
| Face capture quality scoring (0–1) for photo selection |
| Track rectangular objects across video frames |
| Optical flow between video frames |
| Detect object trajectories in video |
references/vision-requests.md| 请求 | 用途 |
|---|---|
| 分类场景内容(户外、食物、动物等) |
| 生成用户注意力热力图 |
| 生成类物体区域热力图 |
| 前景物体分割(非人物特定) |
| 检测矩形形状(文档、卡片、屏幕) |
| 检测地平线角度,用于照片自动校平 |
| 检测人体关节(肩膀、手肘、膝盖) |
| 3D人体姿态估计 |
| 检测手部关节和手指位置 |
| 检测动物身体关节位置 |
| 人脸拍摄质量评分(0–1),用于照片筛选 |
| 跟踪视频帧中的矩形物体 |
| 视频帧之间的光流跟踪 |
| 检测视频中的物体运动轨迹 |
// Modern API (iOS 18+)
let model = try MLModel(contentsOf: modelURL)
let request = CoreMLRequest(model: .init(model))
let results = try await request.perform(on: cgImage)
// Classification model
if let classification = results.first as? ClassificationObservation {
let label = classification.identifier
let confidence = classification.confidence
}// Legacy API
let vnModel = try VNCoreMLModel(for: model)
let request = VNCoreMLRequest(model: vnModel) { request, error in
guard let results = request.results as? [VNClassificationObservation] else { return }
let topResult = results.first
}
let handler = VNImageRequestHandler(cgImage: cgImage)
try handler.perform([request])coreml// 现代API(iOS 18+)
let model = try MLModel(contentsOf: modelURL)
let request = CoreMLRequest(model: .init(model))
let results = try await request.perform(on: cgImage)
// 分类模型
if let classification = results.first as? ClassificationObservation {
let label = classification.identifier
let confidence = classification.confidence
}// 传统API
let vnModel = try VNCoreMLModel(for: model)
let request = VNCoreMLRequest(model: vnModel) { request, error in
guard let results = request.results as? [VNClassificationObservation] else { return }
let topResult = results.first
}
let handler = VNImageRequestHandler(cgImage: cgImage)
try handler.perform([request])coremlDataScannerViewControllerreferences/visionkit-scanner.mdDataScannerViewControllerreferences/visionkit-scanner.mdimport VisionKit
// Check availability (requires A12+ chip and camera)
guard DataScannerViewController.isSupported,
DataScannerViewController.isAvailable else { return }
let scanner = DataScannerViewController(
recognizedDataTypes: [
.text(languages: ["en"]),
.barcode(symbologies: [.qr, .ean13])
],
qualityLevel: .balanced,
recognizesMultipleItems: true,
isHighFrameRateTrackingEnabled: true,
isHighlightingEnabled: true
)
scanner.delegate = self
present(scanner, animated: true) {
try? scanner.startScanning()
}import VisionKit
// 检查可用性(需要A12+芯片和相机权限)
guard DataScannerViewController.isSupported,
DataScannerViewController.isAvailable else { return }
let scanner = DataScannerViewController(
recognizedDataTypes: [
.text(languages: ["en"]),
.barcode(symbologies: [.qr, .ean13])
],
qualityLevel: .balanced,
recognizesMultipleItems: true,
isHighFrameRateTrackingEnabled: true,
isHighlightingEnabled: true
)
scanner.delegate = self
present(scanner, animated: true) {
try? scanner.startScanning()
}DataScannerViewControllerUIViewControllerRepresentablereferences/visionkit-scanner.mdDataScannerViewControllerUIViewControllerRepresentablereferences/visionkit-scanner.mdVNImageRequestHandlerperform(on:)VNImageRectForNormalizedRect(_:_:_:).accurate.fast.accurateconfidenceVNImageRequestHandlerVNSequenceRequestHandlerDataScannerViewControllerisSupportedisAvailableisAvailableVNImageRequestHandlerperform(on:)VNImageRectForNormalizedRect(_:_:_:).accurate.fast.accurateconfidenceVNImageRequestHandlerVNSequenceRequestHandlerDataScannerViewControllerisSupportedisAvailableisAvailable.fast.accurateDataScannerViewControllerNSCameraUsageDescriptionVNSequenceRequestHandler.fast.accurateDataScannerViewControllerNSCameraUsageDescriptionVNSequenceRequestHandlerreferences/vision-requests.mdreferences/visionkit-scanner.mdreferences/vision-requests.mdreferences/visionkit-scanner.md