Skip to content

Commit 9465284

Browse files
tests: Add parametrized validation test for manifest-only connectors
- Fetches manifest-only connectors from connector registry API - Downloads manifest.yaml files from connectors.airbyte.com public endpoints - Validates manifests against CDK declarative component schema - Uses (connector_name, cdk_version) exclusion tuples where cdk_version comes from manifest.yaml - Includes comprehensive logging of validation successes and failures - Automatically forces re-validation when manifest CDK version is updated - Provides clear error messages about CDK compatibility issues Co-Authored-By: AJ Steers <[email protected]>
1 parent addd443 commit 9465284

File tree

1 file changed

+226
-0
lines changed

1 file changed

+226
-0
lines changed
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
"""
2+
Unit tests for validating manifest.yaml files from the connector registry against the CDK schema.
3+
4+
This test suite fetches all manifest-only connectors from the Airbyte connector registry,
5+
downloads their manifest.yaml files from public endpoints, and validates them against
6+
the current declarative component schema defined in the CDK.
7+
"""
8+
9+
import json
10+
import logging
11+
from pathlib import Path
12+
from typing import Any, Dict, List, Tuple
13+
from unittest.mock import patch
14+
15+
import pytest
16+
import requests
17+
import yaml
18+
19+
from airbyte_cdk.sources.declarative.validators.validate_adheres_to_schema import (
20+
ValidateAdheresToSchema,
21+
)
22+
23+
24+
logger = logging.getLogger(__name__)
25+
26+
EXCLUDED_CONNECTORS = [
27+
]
28+
29+
CONNECTOR_REGISTRY_URL = "https://connectors.airbyte.com/files/registries/v0/oss_registry.json"
30+
MANIFEST_URL_TEMPLATE = "https://connectors.airbyte.com/files/metadata/airbyte/{connector_name}/latest/manifest.yaml"
31+
32+
VALIDATION_SUCCESSES = []
33+
VALIDATION_FAILURES = []
34+
DOWNLOAD_FAILURES = []
35+
36+
37+
def load_declarative_component_schema() -> Dict[str, Any]:
38+
"""Load the declarative component schema from the CDK."""
39+
schema_path = (
40+
Path(__file__).resolve().parent.parent.parent.parent
41+
/ "airbyte_cdk/sources/declarative/declarative_component_schema.yaml"
42+
)
43+
with open(schema_path, "r") as file:
44+
return yaml.safe_load(file)
45+
46+
47+
def get_manifest_only_connectors() -> List[Tuple[str, str]]:
48+
"""
49+
Fetch manifest-only connectors from the registry.
50+
51+
Returns:
52+
List of tuples (connector_name, cdk_version) where cdk_version will be
53+
determined from the manifest.yaml file itself.
54+
"""
55+
try:
56+
response = requests.get(CONNECTOR_REGISTRY_URL, timeout=30)
57+
response.raise_for_status()
58+
registry = response.json()
59+
60+
manifest_connectors = []
61+
for source in registry.get("sources", []):
62+
if source.get("language") == "manifest-only":
63+
connector_name = source.get("dockerRepository", "").replace("airbyte/", "")
64+
if connector_name:
65+
manifest_connectors.append((connector_name, None))
66+
67+
return manifest_connectors
68+
except Exception as e:
69+
pytest.fail(f"Failed to fetch connector registry: {e}")
70+
71+
72+
def download_manifest(connector_name: str) -> Tuple[str, str]:
73+
"""
74+
Download manifest.yaml for a connector.
75+
76+
Returns:
77+
Tuple of (manifest_content, cdk_version) where cdk_version is extracted
78+
from the manifest's version field.
79+
"""
80+
url = MANIFEST_URL_TEMPLATE.format(connector_name=connector_name)
81+
try:
82+
response = requests.get(url, timeout=30)
83+
response.raise_for_status()
84+
manifest_content = response.text
85+
86+
manifest_dict = yaml.safe_load(manifest_content)
87+
cdk_version = manifest_dict.get("version", "unknown")
88+
89+
return manifest_content, cdk_version
90+
except Exception as e:
91+
DOWNLOAD_FAILURES.append((connector_name, str(e)))
92+
raise
93+
94+
95+
def get_manifest_only_connector_names() -> List[str]:
96+
"""
97+
Get all manifest-only connector names from the registry.
98+
99+
Returns:
100+
List of connector names (e.g., "source-hubspot")
101+
"""
102+
connectors = get_manifest_only_connectors()
103+
return [connector_name for connector_name, _ in connectors]
104+
105+
106+
@pytest.mark.parametrize("connector_name", get_manifest_only_connector_names())
107+
def test_manifest_validates_against_schema(connector_name: str):
108+
"""
109+
Test that manifest.yaml files from the registry validate against the CDK schema.
110+
111+
Args:
112+
connector_name: Name of the connector (e.g., "source-hubspot")
113+
"""
114+
# Download manifest first to get CDK version
115+
try:
116+
manifest_content, cdk_version = download_manifest(connector_name)
117+
except Exception as e:
118+
pytest.fail(f"Failed to download manifest for {connector_name}: {e}")
119+
120+
if (connector_name, cdk_version) in EXCLUDED_CONNECTORS:
121+
pytest.skip(
122+
f"Skipping {connector_name} - connector declares it is compatible with "
123+
f"CDK version {cdk_version} but is known to fail validation"
124+
)
125+
126+
try:
127+
manifest_dict = yaml.safe_load(manifest_content)
128+
except yaml.YAMLError as e:
129+
error_msg = f"Invalid YAML in manifest for {connector_name}: {e}"
130+
VALIDATION_FAILURES.append((connector_name, cdk_version, error_msg))
131+
pytest.fail(error_msg)
132+
133+
schema = load_declarative_component_schema()
134+
validator = ValidateAdheresToSchema(schema=schema)
135+
136+
try:
137+
validator.validate(manifest_dict)
138+
VALIDATION_SUCCESSES.append((connector_name, cdk_version))
139+
logger.info(f"✓ {connector_name} (CDK {cdk_version}) - validation passed")
140+
except ValueError as e:
141+
error_msg = (
142+
f"Manifest validation failed for {connector_name} "
143+
f"(connector declares it is compatible with CDK version {cdk_version}): {e}"
144+
)
145+
VALIDATION_FAILURES.append((connector_name, cdk_version, str(e)))
146+
logger.error(f"✗ {connector_name} (CDK {cdk_version}) - validation failed: {e}")
147+
pytest.fail(error_msg)
148+
149+
150+
def test_schema_loads_successfully():
151+
"""Test that the declarative component schema loads without errors."""
152+
schema = load_declarative_component_schema()
153+
assert isinstance(schema, dict)
154+
assert "type" in schema
155+
assert schema["type"] == "object"
156+
157+
158+
def test_connector_registry_accessible():
159+
"""Test that the connector registry is accessible."""
160+
response = requests.get(CONNECTOR_REGISTRY_URL, timeout=30)
161+
assert response.status_code == 200
162+
registry = response.json()
163+
assert "sources" in registry
164+
assert isinstance(registry["sources"], list)
165+
166+
167+
def test_manifest_only_connectors_found():
168+
"""Test that we can find manifest-only connectors in the registry."""
169+
connectors = get_manifest_only_connectors()
170+
assert len(connectors) > 0, "No manifest-only connectors found in registry"
171+
172+
for connector_name, _ in connectors:
173+
assert isinstance(connector_name, str)
174+
assert len(connector_name) > 0
175+
assert connector_name.startswith("source-") or connector_name.startswith("destination-")
176+
177+
178+
def test_sample_manifest_download():
179+
"""Test that we can download a sample manifest file."""
180+
connectors = get_manifest_only_connectors()
181+
if not connectors:
182+
pytest.skip("No manifest-only connectors available for testing")
183+
184+
connector_name, _ = connectors[0]
185+
try:
186+
manifest_content, cdk_version = download_manifest(connector_name)
187+
except Exception as e:
188+
pytest.skip(f"Could not download sample manifest from {connector_name}: {e}")
189+
190+
assert isinstance(manifest_content, str)
191+
assert len(manifest_content) > 0
192+
assert isinstance(cdk_version, str)
193+
assert len(cdk_version) > 0
194+
195+
manifest_dict = yaml.safe_load(manifest_content)
196+
assert isinstance(manifest_dict, dict)
197+
assert "version" in manifest_dict
198+
assert manifest_dict["version"] == cdk_version
199+
200+
201+
def log_test_results():
202+
"""Log comprehensive test results for analysis."""
203+
print("\n" + "="*80)
204+
print("MANIFEST VALIDATION TEST RESULTS SUMMARY")
205+
print("="*80)
206+
207+
print(f"\n✓ SUCCESSFUL VALIDATIONS ({len(VALIDATION_SUCCESSES)}):")
208+
for connector_name, cdk_version in VALIDATION_SUCCESSES:
209+
print(f" - {connector_name} (CDK {cdk_version})")
210+
211+
print(f"\n✗ VALIDATION FAILURES ({len(VALIDATION_FAILURES)}):")
212+
for connector_name, cdk_version, error in VALIDATION_FAILURES:
213+
print(f" - {connector_name} (CDK {cdk_version}): {error}")
214+
215+
print(f"\n⚠ DOWNLOAD FAILURES ({len(DOWNLOAD_FAILURES)}):")
216+
for connector_name, error in DOWNLOAD_FAILURES:
217+
print(f" - {connector_name}: {error}")
218+
219+
print("\n" + "="*80)
220+
print(f"TOTAL: {len(VALIDATION_SUCCESSES)} passed, {len(VALIDATION_FAILURES)} failed, {len(DOWNLOAD_FAILURES)} download errors")
221+
print("="*80)
222+
223+
224+
def pytest_sessionfinish(session, exitstatus):
225+
"""Called after whole test run finished, right before returning the exit status to the system."""
226+
log_test_results()

0 commit comments

Comments
 (0)