Source code for colander_data_converter.converters.stix2.utils
1"""
2Utility functions for STIX2 to Colander conversion and vice versa.
3"""
4
5import re
6from typing import Dict, Any, Optional
7from uuid import uuid4, UUID
8
9from pydantic import UUID4
10
11# Precompile the regex for performance
12STIX2_PATTERN_REGEX = re.compile(r"[^=]+\s*=\s*(?:['\"]([^'\"]+)['\"]|([^\s]+))")
13
14
[docs]
15def extract_stix2_pattern_value(pattern: str) -> Optional[str]:
16 """
17 Extract the value from a STIX2 pattern.
18
19 Handles various STIX2 pattern formats like:
20
21 - :textmonoborder:`[file:hashes.MD5 = 'd41d8cd98f00b204e9800998ecf8427e']`
22 - :textmonoborder:`[domain-name:value = 'example.com']`
23 - :textmonoborder:`[ipv4-addr:value = '192.168.1.1']`
24 - :textmonoborder:`[url:value = 'https://example.com/malicious']`
25 - :textmonoborder:`[process:pid = 1234]`
26 - :textmonoborder:`[network-traffic:src_port = 443]`
27
28 Args:
29 pattern (str): The STIX2 pattern string to parse.
30
31 Returns:
32 Optional[str]: The extracted value, or None if no value could be extracted
33 or if the pattern contains multiple criteria.
34 """
35 if not pattern or not isinstance(pattern, str):
36 return None
37
38 # Remove outer brackets and whitespace
39 pattern = pattern.strip()
40 if pattern.startswith("[") and pattern.endswith("]"):
41 pattern = pattern[1:-1].strip()
42
43 # Check for multiple criteria (AND, OR operators)
44 if " AND " in pattern.upper() or " OR " in pattern.upper():
45 return None
46
47 # Match the pattern using the precompiled regex
48 match = STIX2_PATTERN_REGEX.search(pattern)
49 if match:
50 return match.group(1) or match.group(2)
51
52 return None
53
54
[docs]
55def extract_uuid_from_stix2_id(stix2_id: str) -> UUID:
56 """
57 Extract a UUID from a STIX2 ID.
58
59 This function parses a STIX2 identifier string to extract the UUID portion.
60 STIX2 IDs follow the format :textmonoborder:`{type}--{uuid}`, where the UUID is the part
61 after the double dash delimiter.
62
63 :param stix2_id: The STIX2 ID to extract the UUID from
64 :type stix2_id: str
65 :return: The extracted UUID, or a new UUID if extraction fails
66 :rtype: UUID
67
68 .. important::
69 If the input format is invalid or UUID extraction fails, a new random
70 UUID is generated and returned instead of raising an exception.
71
72 Examples:
73 >>> # Valid STIX2 ID with UUID
74 >>> stix_id = "indicator--44af6c9f-4bbc-4984-a74b-1404d1ac07ea"
75 >>> uuid_obj = extract_uuid_from_stix2_id(stix_id)
76 >>> str(uuid_obj)
77 '44af6c9f-4bbc-4984-a74b-1404d1ac07ea'
78
79 >>> # Invalid STIX2 ID format (no delimiter)
80 >>> stix_id = "indicator-invalid-format"
81 >>> uuid_obj = extract_uuid_from_stix2_id(stix_id)
82 >>> isinstance(uuid_obj, UUID) # Returns a new random UUID
83 True
84
85 >>> # Invalid UUID part
86 >>> stix_id = "indicator--not-a-valid-uuid"
87 >>> uuid_obj = extract_uuid_from_stix2_id(stix_id)
88 >>> isinstance(uuid_obj, UUID) # Returns a new random UUID
89 True
90 """
91 try:
92 if stix2_id and "--" in stix2_id:
93 # Extract the part after the "--" delimiter
94 uuid_part = stix2_id.split("--", 1)[1]
95 # Try to create a UUID from the extracted part
96 return UUID4(uuid_part, version=4)
97 except (ValueError, IndexError):
98 # If anything goes wrong, return a new UUID
99 pass
100
101 return uuid4()
102
103
[docs]
104def extract_stix2_pattern_name(stix2_pattern: str) -> Optional[str]:
105 """
106 Extract the name from a STIX 2 pattern string.
107
108 This function parses STIX2 pattern expressions to extract the field name
109 portion before the equality operator. It removes brackets and extracts
110 the left side of the comparison.
111
112 :param stix2_pattern: The STIX 2 pattern string to extract the name from
113 :type stix2_pattern: str
114 :return: The extracted name or None if no name is found
115 :rtype: Optional[str]
116
117 .. note::
118 The function handles various STIX2 pattern formats including nested
119 hash references like :textmonoborder:`file:hashes.'SHA-256'`.
120
121 Examples:
122 >>> pattern = "[ipv4-addr:value = '192.168.1.1']"
123 >>> extract_stix2_pattern_name(pattern)
124 'ipv4-addr:value'
125
126 >>> pattern = "[file:hashes.'SHA-256' = '123abc']"
127 >>> extract_stix2_pattern_name(pattern)
128 "file:hashes.'SHA-256'"
129 """
130 _to_replace = [
131 ("[", ""),
132 ("]", ""),
133 ]
134 if "=" not in stix2_pattern:
135 return ""
136 _stix2_pattern = stix2_pattern
137 for _replace in _to_replace:
138 _stix2_pattern = _stix2_pattern.replace(_replace[0], _replace[1])
139 return _stix2_pattern.split("=")[0].strip()
140
141
[docs]
142def get_nested_value(obj: Dict[str, Any], path: str) -> Any:
143 """
144 Get a value from a nested dictionary using a dot-separated path.
145
146 This function safely navigates through nested dictionaries using a
147 dot-separated path string. It returns the value at the specified path
148 or None if any part of the path is missing or invalid.
149
150 :param obj: The dictionary to get the value from
151 :type obj: Dict[str, Any]
152 :param path: The dot-separated path to the value
153 :type path: str
154 :return: The value at the specified path, or None if not found
155 :rtype: Any
156
157 .. warning::
158 This function returns None for missing paths rather than raising
159 exceptions. Check for None return values when path existence is critical.
160
161 Examples:
162 >>> data = {
163 ... "user": {
164 ... "profile": {
165 ... "name": "John",
166 ... "age": 30
167 ... },
168 ... "settings": {
169 ... "theme": "dark"
170 ... }
171 ... }
172 ... }
173 >>> get_nested_value(data, "user.profile.name")
174 'John'
175 >>> get_nested_value(data, "user.settings.theme")
176 'dark'
177 """
178 if not path:
179 return None
180
181 parts = path.split(".")
182 current = obj
183
184 for part in parts:
185 if isinstance(current, dict) and part in current:
186 current = current[part]
187 else:
188 return None
189
190 return current
191
192
[docs]
193def set_nested_value(obj: Dict[str, Any], path: str, value: Any) -> None:
194 """
195 Set a value in a nested dictionary using a dot-separated path.
196
197 This function creates nested dictionaries as needed to set a value at
198 the specified dot-separated path. If intermediate dictionaries don't
199 exist, they are automatically created.
200
201 :param obj: The dictionary to set the value in
202 :type obj: Dict[str, Any]
203 :param path: The dot-separated path to the value
204 :type path: str
205 :param value: The value to set
206 :type value: Any
207
208 .. note::
209 The function modifies the input dictionary in-place and automatically
210 creates any missing intermediate dictionary levels.
211
212 Examples:
213 >>> data = {}
214 >>> set_nested_value(data, "user.profile.name", "John")
215 >>> data
216 {'user': {'profile': {'name': 'John'}}}
217
218 >>> # Update existing nested value
219 >>> data = {'user': {'settings': {'theme': 'light'}}}
220 >>> set_nested_value(data, "user.settings.theme", "dark")
221 >>> data
222 {'user': {'settings': {'theme': 'dark'}}}
223
224 >>> # Add new nested path to existing structure
225 >>> set_nested_value(data, "user.profile.age", 30)
226 >>> data
227 {'user': {'settings': {'theme': 'dark'}, 'profile': {'age': 30}}}
228
229 >>> # Empty path does nothing
230 >>> original = {'a': 1}
231 >>> set_nested_value(original, "", "value")
232 >>> original
233 {'a': 1}
234 """
235 if not path:
236 return
237
238 parts = path.split(".")
239 current = obj
240
241 # Navigate to the parent of the final part
242 for part in parts[:-1]:
243 if part not in current:
244 current[part] = {}
245 current = current[part]
246
247 # Set the value at the final part
248 current[parts[-1]] = value