Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions airbyte_cdk/sources/file_based/file_types/csv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def _get_headers(self, fp: IOBase, config_format: CsvFormat, dialect_name: str)
# Then read the header
self._skip_rows(fp, config_format.skip_rows_before_header)
reader = csv.reader(fp, dialect=dialect_name) # type: ignore
headers = list(next(reader))
headers = [header.strip() for header in next(reader)]

fp.seek(0)
return headers
Expand Down Expand Up @@ -209,7 +209,7 @@ async def infer_schema(
failure_type=FailureType.config_error,
)
schema = {
header.strip(): {"type": type_inferred.infer()}
header: {"type": type_inferred.infer()}
for header, type_inferred in type_inferrer_by_field.items()
}
data_generator.close()
Expand Down
14 changes: 14 additions & 0 deletions unit_tests/sources/file_based/file_types/test_csv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,20 @@ def test_read_data_with_encoding_error(self) -> None:
assert "encoding" in ate.value.message
assert self._csv_reader._get_headers.called

def test_read_data_strips_leading_and_trailing_whitespace_in_header(self) -> None:
self._stream_reader.open_file.return_value = (
CsvFileBuilder()
.with_data(
[
"header1 ,\theader2",
"1,2",
]
)
.build()
)
data_generator = self._read_data()
assert list(data_generator) == [{"header1": "1", "header2": "2"}]

def _read_data(self) -> Generator[Dict[str, str], None, None]:
data_generator = self._csv_reader.read_data(
self._config,
Expand Down
Loading