Source code for colander_data_converter.exporters.csv

  1import csv
  2from typing import List, get_args, Dict, Set, TextIO
  3
  4from pydantic import BaseModel
  5
  6from colander_data_converter.base.common import ObjectReference
  7from colander_data_converter.base.models import ColanderFeed
  8from colander_data_converter.exporters.exporter import BaseExporter
  9
 10

[docs]
 11class CsvExporter(BaseExporter):
 12    """
 13    A class to export entities from a ColanderFeed to CSV format.
 14
 15    This exporter filters entities by type and exports their fields to a CSV file,
 16    excluding certain internal fields and object references.
 17    """
 18
 19    excluded_fields: List[str] = ["colander_internal_type", "attributes"]
 20    """Fields to exclude from CSV export"""
 21

[docs]
 22    def __init__(self, feed: ColanderFeed, entity_type: type[BaseModel]):
 23        """
 24        Initialize the CSV exporter.
 25
 26        Args:
 27            feed: The feed containing entities to export
 28            entity_type: The Pydantic model type to filter entities by
 29
 30        Raises:
 31            AssertionError: If :py:obj:`entity_type` is not a subclass of :py:class:`pydantic.BaseModel` or
 32                :py:obj:`feed` is not a :py:class:`~colander_data_converter.base.models.ColanderFeed`.
 33        """
 34        assert issubclass(entity_type, BaseModel)
 35        assert isinstance(feed, ColanderFeed)
 36
 37        self.feed = feed
 38        self.entity_type = entity_type
 39        self.entities: List[entity_type] = []
 40        self.fields: Set[str] = {"super_type"}
 41        self.feed.resolve_references()
 42        self._filter_entities()
 43        self._compute_field_list()

 44
 45    def _filter_entities(self):
 46        """
 47        Filter entities from the feed to include only those matching the specified entity type.
 48
 49        Populates the self.entities list with matching entities.
 50        """
 51        for _, entity in self.feed.entities.items():
 52            if isinstance(entity, self.entity_type):
 53                self.entities.append(entity)
 54
 55    def _compute_field_list(self, exclude_none=True):
 56        """
 57        Compute the list of fields to include in the CSV export.
 58
 59        This method performs a two-pass filtering process to determine which fields
 60        should be included in the CSV output:
 61
 62        1. First pass: Identifies candidate fields by excluding internal fields and object references
 63        2. Second pass: Optionally excludes fields that are None for all entities
 64
 65        Args:
 66            exclude_none: Whether to exclude fields that are None for all entities. Defaults to True.
 67
 68        Returns:
 69            None: Updates :py:obj:`self.fields` in-place with the computed field list
 70
 71        Side effects:
 72            - Modifies self.fields by adding qualifying field names
 73            - self.fields is sorted alphabetically after computation
 74            - 'super_type' is always included regardless of other filtering criteria
 75
 76        Note:
 77            This method assumes self.entities has already been populated with filtered entities
 78            and self.entity_type contains the Pydantic model definition with field information.
 79        """
 80        candidate_fields = set()
 81
 82        # First pass: collect all potential fields
 83        for field, info in self.entity_type.model_fields.items():
 84            if field in self.excluded_fields:
 85                continue
 86            annotation_args = get_args(info.annotation)
 87            if ObjectReference in annotation_args or List[ObjectReference] in annotation_args:
 88                continue
 89            candidate_fields.add(field)
 90
 91        # Second pass: exclude fields that are None for all entities
 92        if exclude_none:
 93            for field in candidate_fields:
 94                has_non_none_value = False
 95                for entity in self.entities:
 96                    if hasattr(entity, field) and getattr(entity, field) is not None:
 97                        has_non_none_value = True
 98                        break  # Exit early if we find at least one non-None value
 99
100                if has_non_none_value:
101                    self.fields.add(field)
102
103        self.fields.add("super_type")
104        self.fields = sorted(self.fields)
105

[docs]
106    def export(self, output: TextIO, **csv_options):
107        """
108        Export the filtered entities to a CSV file. The CSV includes a header row and one row per entity with the
109        computed field values.
110
111        Args:
112            output: A file-like object to write the CSV to
113            csv_options: Optional keyword arguments passed to :py:class:`csv.DictWriter`. Common options include:
114
115                - quoting: csv.QUOTE_ALL, csv.QUOTE_MINIMAL, etc.
116                - delimiter: Field delimiter
117                - quotechar: Character used for quoting
118                - lineterminator: Line terminator
119                - extrasaction: How to handle extra fields
120
121        Raises:
122            AssertionError: If output is not a file-like object
123        """
124        assert output is not None
125
126        # Set default CSV options if not provided
127        csv_defaults = {"quoting": csv.QUOTE_ALL, "delimiter": ",", "quotechar": '"', "lineterminator": "\n"}
128
129        # Merge user options with defaults (user options take precedence)
130        writer_options = {**csv_defaults, **csv_options}
131
132        objects: List[Dict] = []
133        for e in self.entities:
134            obj = e.model_dump(mode="json")
135            obj["type"] = str(e.type)
136            obj["super_type"] = str(e.super_type)
137            objects.append({k: obj[k] for k in sorted(self.fields) if k not in self.excluded_fields})
138
139        writer = csv.DictWriter(output, fieldnames=self.fields, **writer_options)
140        writer.writeheader()
141        writer.writerows(objects)