Skip to content

"C" dbf fields starting or ending in white space have that whitespace removed #418

@JamesParrott

Description

@JamesParrott

PyShp Version

Since ever

Python Version

all

Your code

`ascii_printable = string.ascii_letters + string.digits + string.punctuation + " "` in hypothesis_tests.py, and run



pytest -k test_dbf_reader_writer_roundtrip -vv

Full stacktrace

__________________________________________ test_dbf_reader_writer_roundtrip ___________________________________________

    @pytest.mark.hypothesis
>   @given(fields_and_records=dbf_fields_and_records())
                   ^^^

tests\hypothesis_tests.py:611:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

fields_and_records = ([{'decimal': 0, 'field_type': 'C', 'name': '0', 'size': 1}, {'decimal': 0, 'field_type': 'C', 'name': '0', 'size': 1}, {'decimal': 0, 'field_type': 'C', 'name': '0', 'size': 1}], [('', '', ' ')])

    @pytest.mark.hypothesis
    @given(fields_and_records=dbf_fields_and_records())
    def test_dbf_reader_writer_roundtrip(fields_and_records)-> None:
        fields, records = fields_and_records
        stream = io.BytesIO()
        with shp.DbfWriter(dbf=stream) as dbf_w:
            for field in fields:
                dbf_w.field(**field)
            for record in records:
                dbf_w.record(*record)
        stream.seek(0)
        with shp.DbfReader(dbf=stream) as r:
            actual_fields = iter(r.fields)
            next(actual_fields) # skip deletion flag
            for f_r, f_w in itertools.zip_longest(actual_fields, fields):
                actual_field_dict = f_r._asdict()
                for k in ("field_type", "size", "decimal"):
                    assert actual_field_dict[k] == f_w[k], f"{k=}, {actual_field_dict[k]=}, {f_w[k]=}"
            for exp_rec, actual_rec in itertools.zip_longest(records, r.records()):
                for expected, actual, (name, field_type, size, decimal) in itertools.zip_longest(
                    exp_rec, actual_rec, fields):
                    if field_type == "D":
                        if isinstance(expected, datetime.date):
                            expected = expected.strftime("%Y%m%d")
                        if isinstance(actual, datetime.date):
                            actual = actual.strftime("%Y%m%d")
>                   assert actual == expected
E                   AssertionError: assert '' == ' '
E
E                     Strings contain only whitespace, escaping them using repr()
E                     - ' '
E                     ?  -
E                     + ''
E                   Falsifying example: test_dbf_reader_writer_roundtrip(
E                       fields_and_records=([{'name': '0',
E                          'field_type': 'C',
E                          'size': 1,
E                          'decimal': 0},
E                         {'name': '0', 'field_type': 'C', 'size': 1, 'decimal': 0},
E                         {'name': '0', 'field_type': 'C', 'size': 1, 'decimal': 0}],
E                        [(
E                             '',  # or any other generated value
E                             '',  # or any other generated value
E                             ' ',
E                         )]),
E                   )

tests\hypothesis_tests.py:636: AssertionError

Other notes

Why does PySHp strip out leading and trailing whitespace from the user's string data, before checking to see if truncation is even required or not?

Not a huge issue, it's just an annoyance while testing.

value = value.strip().rstrip(

Metadata

Metadata

Assignees

No one assigned

    Labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions