|
| 1 | +""" |
| 2 | +Unit tests for validating manifest.yaml files from the connector registry against the CDK schema. |
| 3 | +
|
| 4 | +This test suite fetches all manifest-only connectors from the Airbyte connector registry, |
| 5 | +downloads their manifest.yaml files from public endpoints, and validates them against |
| 6 | +the current declarative component schema defined in the CDK. |
| 7 | +""" |
| 8 | + |
| 9 | +import json |
| 10 | +import logging |
| 11 | +from pathlib import Path |
| 12 | +from typing import Any, Dict, List, Tuple |
| 13 | +from unittest.mock import patch |
| 14 | + |
| 15 | +import pytest |
| 16 | +import requests |
| 17 | +import yaml |
| 18 | + |
| 19 | +from airbyte_cdk.sources.declarative.validators.validate_adheres_to_schema import ( |
| 20 | + ValidateAdheresToSchema, |
| 21 | +) |
| 22 | + |
| 23 | + |
| 24 | +logger = logging.getLogger(__name__) |
| 25 | + |
| 26 | +EXCLUDED_CONNECTORS = [ |
| 27 | +] |
| 28 | + |
| 29 | +CONNECTOR_REGISTRY_URL = "https://connectors.airbyte.com/files/registries/v0/oss_registry.json" |
| 30 | +MANIFEST_URL_TEMPLATE = "https://connectors.airbyte.com/files/metadata/airbyte/{connector_name}/latest/manifest.yaml" |
| 31 | + |
| 32 | +VALIDATION_SUCCESSES = [] |
| 33 | +VALIDATION_FAILURES = [] |
| 34 | +DOWNLOAD_FAILURES = [] |
| 35 | + |
| 36 | + |
| 37 | +def load_declarative_component_schema() -> Dict[str, Any]: |
| 38 | + """Load the declarative component schema from the CDK.""" |
| 39 | + schema_path = ( |
| 40 | + Path(__file__).resolve().parent.parent.parent.parent |
| 41 | + / "airbyte_cdk/sources/declarative/declarative_component_schema.yaml" |
| 42 | + ) |
| 43 | + with open(schema_path, "r") as file: |
| 44 | + return yaml.safe_load(file) |
| 45 | + |
| 46 | + |
| 47 | +def get_manifest_only_connectors() -> List[Tuple[str, str]]: |
| 48 | + """ |
| 49 | + Fetch manifest-only connectors from the registry. |
| 50 | + |
| 51 | + Returns: |
| 52 | + List of tuples (connector_name, cdk_version) where cdk_version will be |
| 53 | + determined from the manifest.yaml file itself. |
| 54 | + """ |
| 55 | + try: |
| 56 | + response = requests.get(CONNECTOR_REGISTRY_URL, timeout=30) |
| 57 | + response.raise_for_status() |
| 58 | + registry = response.json() |
| 59 | + |
| 60 | + manifest_connectors = [] |
| 61 | + for source in registry.get("sources", []): |
| 62 | + if source.get("language") == "manifest-only": |
| 63 | + connector_name = source.get("dockerRepository", "").replace("airbyte/", "") |
| 64 | + if connector_name: |
| 65 | + manifest_connectors.append((connector_name, None)) |
| 66 | + |
| 67 | + return manifest_connectors |
| 68 | + except Exception as e: |
| 69 | + pytest.fail(f"Failed to fetch connector registry: {e}") |
| 70 | + |
| 71 | + |
| 72 | +def download_manifest(connector_name: str) -> Tuple[str, str]: |
| 73 | + """ |
| 74 | + Download manifest.yaml for a connector. |
| 75 | + |
| 76 | + Returns: |
| 77 | + Tuple of (manifest_content, cdk_version) where cdk_version is extracted |
| 78 | + from the manifest's version field. |
| 79 | + """ |
| 80 | + url = MANIFEST_URL_TEMPLATE.format(connector_name=connector_name) |
| 81 | + try: |
| 82 | + response = requests.get(url, timeout=30) |
| 83 | + response.raise_for_status() |
| 84 | + manifest_content = response.text |
| 85 | + |
| 86 | + manifest_dict = yaml.safe_load(manifest_content) |
| 87 | + cdk_version = manifest_dict.get("version", "unknown") |
| 88 | + |
| 89 | + return manifest_content, cdk_version |
| 90 | + except Exception as e: |
| 91 | + DOWNLOAD_FAILURES.append((connector_name, str(e))) |
| 92 | + raise |
| 93 | + |
| 94 | + |
| 95 | +def get_manifest_only_connector_names() -> List[str]: |
| 96 | + """ |
| 97 | + Get all manifest-only connector names from the registry. |
| 98 | + |
| 99 | + Returns: |
| 100 | + List of connector names (e.g., "source-hubspot") |
| 101 | + """ |
| 102 | + connectors = get_manifest_only_connectors() |
| 103 | + return [connector_name for connector_name, _ in connectors] |
| 104 | + |
| 105 | + |
| 106 | +@pytest.mark.parametrize("connector_name", get_manifest_only_connector_names()) |
| 107 | +def test_manifest_validates_against_schema(connector_name: str): |
| 108 | + """ |
| 109 | + Test that manifest.yaml files from the registry validate against the CDK schema. |
| 110 | + |
| 111 | + Args: |
| 112 | + connector_name: Name of the connector (e.g., "source-hubspot") |
| 113 | + """ |
| 114 | + # Download manifest first to get CDK version |
| 115 | + try: |
| 116 | + manifest_content, cdk_version = download_manifest(connector_name) |
| 117 | + except Exception as e: |
| 118 | + pytest.fail(f"Failed to download manifest for {connector_name}: {e}") |
| 119 | + |
| 120 | + if (connector_name, cdk_version) in EXCLUDED_CONNECTORS: |
| 121 | + pytest.skip( |
| 122 | + f"Skipping {connector_name} - connector declares it is compatible with " |
| 123 | + f"CDK version {cdk_version} but is known to fail validation" |
| 124 | + ) |
| 125 | + |
| 126 | + try: |
| 127 | + manifest_dict = yaml.safe_load(manifest_content) |
| 128 | + except yaml.YAMLError as e: |
| 129 | + error_msg = f"Invalid YAML in manifest for {connector_name}: {e}" |
| 130 | + VALIDATION_FAILURES.append((connector_name, cdk_version, error_msg)) |
| 131 | + pytest.fail(error_msg) |
| 132 | + |
| 133 | + schema = load_declarative_component_schema() |
| 134 | + validator = ValidateAdheresToSchema(schema=schema) |
| 135 | + |
| 136 | + try: |
| 137 | + validator.validate(manifest_dict) |
| 138 | + VALIDATION_SUCCESSES.append((connector_name, cdk_version)) |
| 139 | + logger.info(f"✓ {connector_name} (CDK {cdk_version}) - validation passed") |
| 140 | + except ValueError as e: |
| 141 | + error_msg = ( |
| 142 | + f"Manifest validation failed for {connector_name} " |
| 143 | + f"(connector declares it is compatible with CDK version {cdk_version}): {e}" |
| 144 | + ) |
| 145 | + VALIDATION_FAILURES.append((connector_name, cdk_version, str(e))) |
| 146 | + logger.error(f"✗ {connector_name} (CDK {cdk_version}) - validation failed: {e}") |
| 147 | + pytest.fail(error_msg) |
| 148 | + |
| 149 | + |
| 150 | +def test_schema_loads_successfully(): |
| 151 | + """Test that the declarative component schema loads without errors.""" |
| 152 | + schema = load_declarative_component_schema() |
| 153 | + assert isinstance(schema, dict) |
| 154 | + assert "type" in schema |
| 155 | + assert schema["type"] == "object" |
| 156 | + |
| 157 | + |
| 158 | +def test_connector_registry_accessible(): |
| 159 | + """Test that the connector registry is accessible.""" |
| 160 | + response = requests.get(CONNECTOR_REGISTRY_URL, timeout=30) |
| 161 | + assert response.status_code == 200 |
| 162 | + registry = response.json() |
| 163 | + assert "sources" in registry |
| 164 | + assert isinstance(registry["sources"], list) |
| 165 | + |
| 166 | + |
| 167 | +def test_manifest_only_connectors_found(): |
| 168 | + """Test that we can find manifest-only connectors in the registry.""" |
| 169 | + connectors = get_manifest_only_connectors() |
| 170 | + assert len(connectors) > 0, "No manifest-only connectors found in registry" |
| 171 | + |
| 172 | + for connector_name, _ in connectors: |
| 173 | + assert isinstance(connector_name, str) |
| 174 | + assert len(connector_name) > 0 |
| 175 | + assert connector_name.startswith("source-") or connector_name.startswith("destination-") |
| 176 | + |
| 177 | + |
| 178 | +def test_sample_manifest_download(): |
| 179 | + """Test that we can download a sample manifest file.""" |
| 180 | + connectors = get_manifest_only_connectors() |
| 181 | + if not connectors: |
| 182 | + pytest.skip("No manifest-only connectors available for testing") |
| 183 | + |
| 184 | + connector_name, _ = connectors[0] |
| 185 | + try: |
| 186 | + manifest_content, cdk_version = download_manifest(connector_name) |
| 187 | + except Exception as e: |
| 188 | + pytest.skip(f"Could not download sample manifest from {connector_name}: {e}") |
| 189 | + |
| 190 | + assert isinstance(manifest_content, str) |
| 191 | + assert len(manifest_content) > 0 |
| 192 | + assert isinstance(cdk_version, str) |
| 193 | + assert len(cdk_version) > 0 |
| 194 | + |
| 195 | + manifest_dict = yaml.safe_load(manifest_content) |
| 196 | + assert isinstance(manifest_dict, dict) |
| 197 | + assert "version" in manifest_dict |
| 198 | + assert manifest_dict["version"] == cdk_version |
| 199 | + |
| 200 | + |
| 201 | +def log_test_results(): |
| 202 | + """Log comprehensive test results for analysis.""" |
| 203 | + print("\n" + "="*80) |
| 204 | + print("MANIFEST VALIDATION TEST RESULTS SUMMARY") |
| 205 | + print("="*80) |
| 206 | + |
| 207 | + print(f"\n✓ SUCCESSFUL VALIDATIONS ({len(VALIDATION_SUCCESSES)}):") |
| 208 | + for connector_name, cdk_version in VALIDATION_SUCCESSES: |
| 209 | + print(f" - {connector_name} (CDK {cdk_version})") |
| 210 | + |
| 211 | + print(f"\n✗ VALIDATION FAILURES ({len(VALIDATION_FAILURES)}):") |
| 212 | + for connector_name, cdk_version, error in VALIDATION_FAILURES: |
| 213 | + print(f" - {connector_name} (CDK {cdk_version}): {error}") |
| 214 | + |
| 215 | + print(f"\n⚠ DOWNLOAD FAILURES ({len(DOWNLOAD_FAILURES)}):") |
| 216 | + for connector_name, error in DOWNLOAD_FAILURES: |
| 217 | + print(f" - {connector_name}: {error}") |
| 218 | + |
| 219 | + print("\n" + "="*80) |
| 220 | + print(f"TOTAL: {len(VALIDATION_SUCCESSES)} passed, {len(VALIDATION_FAILURES)} failed, {len(DOWNLOAD_FAILURES)} download errors") |
| 221 | + print("="*80) |
| 222 | + |
| 223 | + |
| 224 | +def pytest_sessionfinish(session, exitstatus): |
| 225 | + """Called after whole test run finished, right before returning the exit status to the system.""" |
| 226 | + log_test_results() |
0 commit comments