Source code for colander_data_converter.exporters.csv

  1import csv
  2from typing import List, get_args, Dict, Set, TextIO
  3
  4from pydantic import BaseModel
  5
  6from colander_data_converter.base.common import ObjectReference
  7from colander_data_converter.base.models import ColanderFeed
  8from colander_data_converter.exporters.exporter import BaseExporter
  9
 10
[docs] 11class CsvExporter(BaseExporter): 12 """ 13 A class to export entities from a ColanderFeed to CSV format. 14 15 This exporter filters entities by type and exports their fields to a CSV file, 16 excluding certain internal fields and object references. 17 """ 18 19 excluded_fields: List[str] = ["colander_internal_type", "attributes"] 20 """Fields to exclude from CSV export""" 21
[docs] 22 def __init__(self, feed: ColanderFeed, entity_type: type[BaseModel]): 23 """ 24 Initialize the CSV exporter. 25 26 Args: 27 feed (~colander_data_converter.base.models.ColanderFeed): The feed containing entities to export 28 entity_type (type[BaseModel]): The Pydantic model type to filter entities by 29 30 Raises: 31 AssertionError: If :py:obj:`entity_type` is not a subclass of :py:class:`pydantic.BaseModel` or 32 :py:obj:`feed` is not a :py:class:`~colander_data_converter.base.models.ColanderFeed`. 33 """ 34 assert issubclass(entity_type, BaseModel) 35 assert isinstance(feed, ColanderFeed) 36 37 self.feed = feed 38 self.entity_type = entity_type 39 self.entities: List[entity_type] = [] 40 self.fields: Set[str] = {"super_type"} 41 self.feed.resolve_references() 42 self._filter_entities() 43 self._compute_field_list()
44 45 def _filter_entities(self): 46 """ 47 Filter entities from the feed to include only those matching the specified entity type. 48 49 Populates the self.entities list with matching entities. 50 """ 51 for _, entity in self.feed.entities.items(): 52 if isinstance(entity, self.entity_type): 53 self.entities.append(entity) 54 55 def _compute_field_list(self, exclude_none=True): 56 """ 57 Compute the list of fields to include in the CSV export. 58 59 This method performs a two-pass filtering process to determine which fields 60 should be included in the CSV output: 61 62 1. First pass: Identifies candidate fields by excluding internal fields and object references 63 2. Second pass: Optionally excludes fields that are None for all entities 64 65 Args: 66 exclude_none (bool, optional): Whether to exclude fields that are None for all entities. Defaults to True. 67 68 Returns: 69 None: Updates :py:obj:`self.fields` in-place with the computed field list 70 71 Side effects: 72 - Modifies self.fields by adding qualifying field names 73 - self.fields is sorted alphabetically after computation 74 - 'super_type' is always included regardless of other filtering criteria 75 76 Note: 77 This method assumes self.entities has already been populated with filtered entities 78 and self.entity_type contains the Pydantic model definition with field information. 79 """ 80 candidate_fields = set() 81 82 # First pass: collect all potential fields 83 for field, info in self.entity_type.model_fields.items(): 84 if field in self.excluded_fields: 85 continue 86 annotation_args = get_args(info.annotation) 87 if ObjectReference in annotation_args or List[ObjectReference] in annotation_args: 88 continue 89 candidate_fields.add(field) 90 91 # Second pass: exclude fields that are None for all entities 92 if exclude_none: 93 for field in candidate_fields: 94 has_non_none_value = False 95 for entity in self.entities: 96 if hasattr(entity, field) and getattr(entity, field) is not None: 97 has_non_none_value = True 98 break # Exit early if we find at least one non-None value 99 100 if has_non_none_value: 101 self.fields.add(field) 102 103 self.fields.add("super_type") 104 self.fields = sorted(self.fields) 105
[docs] 106 def export(self, output: TextIO, **csv_options): 107 """ 108 Export the filtered entities to a CSV file. The CSV includes a header row and one row per entity with the 109 computed field values. 110 111 Args: 112 output (TextIO): A file-like object to write the CSV to 113 csv_options: Optional keyword arguments passed to :py:class:`csv.DictWriter`. Common options include: 114 115 - quoting: csv.QUOTE_ALL, csv.QUOTE_MINIMAL, etc. 116 - delimiter: Field delimiter 117 - quotechar: Character used for quoting 118 - lineterminator: Line terminator 119 - extrasaction: How to handle extra fields 120 121 Raises: 122 AssertionError: If output is not a file-like object 123 """ 124 assert output is not None 125 126 # Set default CSV options if not provided 127 csv_defaults = {"quoting": csv.QUOTE_ALL, "delimiter": ",", "quotechar": '"', "lineterminator": "\n"} 128 129 # Merge user options with defaults (user options take precedence) 130 writer_options = {**csv_defaults, **csv_options} 131 132 objects: List[Dict] = [] 133 for e in self.entities: 134 obj = e.model_dump(mode="json") 135 obj["type"] = str(e.type) 136 obj["super_type"] = str(e.super_type) 137 objects.append({k: obj[k] for k in sorted(self.fields) if k not in self.excluded_fields}) 138 139 writer = csv.DictWriter(output, fieldnames=self.fields, **writer_options) 140 writer.writeheader() 141 writer.writerows(objects)