1import csv
2from typing import List, get_args, Dict, Set, TextIO
3
4from pydantic import BaseModel
5
6from colander_data_converter.base.common import ObjectReference
7from colander_data_converter.base.models import ColanderFeed
8from colander_data_converter.exporters.exporter import BaseExporter
9
10
[docs]
11class CsvExporter(BaseExporter):
12 """
13 A class to export entities from a ColanderFeed to CSV format.
14
15 This exporter filters entities by type and exports their fields to a CSV file,
16 excluding certain internal fields and object references.
17 """
18
19 excluded_fields: List[str] = ["colander_internal_type", "attributes"]
20 """Fields to exclude from CSV export"""
21
[docs]
22 def __init__(self, feed: ColanderFeed, entity_type: type[BaseModel]):
23 """
24 Initialize the CSV exporter.
25
26 Args:
27 feed (~colander_data_converter.base.models.ColanderFeed): The feed containing entities to export
28 entity_type (type[BaseModel]): The Pydantic model type to filter entities by
29
30 Raises:
31 AssertionError: If :py:obj:`entity_type` is not a subclass of :py:class:`pydantic.BaseModel` or
32 :py:obj:`feed` is not a :py:class:`~colander_data_converter.base.models.ColanderFeed`.
33 """
34 assert issubclass(entity_type, BaseModel)
35 assert isinstance(feed, ColanderFeed)
36
37 self.feed = feed
38 self.entity_type = entity_type
39 self.entities: List[entity_type] = []
40 self.fields: Set[str] = {"super_type"}
41 self.feed.resolve_references()
42 self._filter_entities()
43 self._compute_field_list()
44
45 def _filter_entities(self):
46 """
47 Filter entities from the feed to include only those matching the specified entity type.
48
49 Populates the self.entities list with matching entities.
50 """
51 for _, entity in self.feed.entities.items():
52 if isinstance(entity, self.entity_type):
53 self.entities.append(entity)
54
55 def _compute_field_list(self, exclude_none=True):
56 """
57 Compute the list of fields to include in the CSV export.
58
59 This method performs a two-pass filtering process to determine which fields
60 should be included in the CSV output:
61
62 1. First pass: Identifies candidate fields by excluding internal fields and object references
63 2. Second pass: Optionally excludes fields that are None for all entities
64
65 Args:
66 exclude_none (bool, optional): Whether to exclude fields that are None for all entities. Defaults to True.
67
68 Returns:
69 None: Updates :py:obj:`self.fields` in-place with the computed field list
70
71 Side effects:
72 - Modifies self.fields by adding qualifying field names
73 - self.fields is sorted alphabetically after computation
74 - 'super_type' is always included regardless of other filtering criteria
75
76 Note:
77 This method assumes self.entities has already been populated with filtered entities
78 and self.entity_type contains the Pydantic model definition with field information.
79 """
80 candidate_fields = set()
81
82 # First pass: collect all potential fields
83 for field, info in self.entity_type.model_fields.items():
84 if field in self.excluded_fields:
85 continue
86 annotation_args = get_args(info.annotation)
87 if ObjectReference in annotation_args or List[ObjectReference] in annotation_args:
88 continue
89 candidate_fields.add(field)
90
91 # Second pass: exclude fields that are None for all entities
92 if exclude_none:
93 for field in candidate_fields:
94 has_non_none_value = False
95 for entity in self.entities:
96 if hasattr(entity, field) and getattr(entity, field) is not None:
97 has_non_none_value = True
98 break # Exit early if we find at least one non-None value
99
100 if has_non_none_value:
101 self.fields.add(field)
102
103 self.fields.add("super_type")
104 self.fields = sorted(self.fields)
105
[docs]
106 def export(self, output: TextIO, **csv_options):
107 """
108 Export the filtered entities to a CSV file. The CSV includes a header row and one row per entity with the
109 computed field values.
110
111 Args:
112 output (TextIO): A file-like object to write the CSV to
113 csv_options: Optional keyword arguments passed to :py:class:`csv.DictWriter`. Common options include:
114
115 - quoting: csv.QUOTE_ALL, csv.QUOTE_MINIMAL, etc.
116 - delimiter: Field delimiter
117 - quotechar: Character used for quoting
118 - lineterminator: Line terminator
119 - extrasaction: How to handle extra fields
120
121 Raises:
122 AssertionError: If output is not a file-like object
123 """
124 assert output is not None
125
126 # Set default CSV options if not provided
127 csv_defaults = {"quoting": csv.QUOTE_ALL, "delimiter": ",", "quotechar": '"', "lineterminator": "\n"}
128
129 # Merge user options with defaults (user options take precedence)
130 writer_options = {**csv_defaults, **csv_options}
131
132 objects: List[Dict] = []
133 for e in self.entities:
134 obj = e.model_dump(mode="json")
135 obj["type"] = str(e.type)
136 obj["super_type"] = str(e.super_type)
137 objects.append({k: obj[k] for k in sorted(self.fields) if k not in self.excluded_fields})
138
139 writer = csv.DictWriter(output, fieldnames=self.fields, **writer_options)
140 writer.writeheader()
141 writer.writerows(objects)