Source code for colander_data_converter.converters.stix2.utils

  1"""
  2Utility functions for STIX2 to Colander conversion and vice versa.
  3"""
  4
  5import re
  6from typing import Dict, Any, Optional
  7from uuid import uuid4, UUID
  8
  9from pydantic import UUID4
 10
 11# Precompile the regex for performance
 12STIX2_PATTERN_REGEX = re.compile(r"[^=]+\s*=\s*(?:['\"]([^'\"]+)['\"]|([^\s]+))")
 13
 14
[docs] 15def extract_stix2_pattern_value(pattern: str) -> Optional[str]: 16 """ 17 Extract the value from a STIX2 pattern. 18 19 Handles various STIX2 pattern formats like: 20 21 - :textmonoborder:`[file:hashes.MD5 = 'd41d8cd98f00b204e9800998ecf8427e']` 22 - :textmonoborder:`[domain-name:value = 'example.com']` 23 - :textmonoborder:`[ipv4-addr:value = '192.168.1.1']` 24 - :textmonoborder:`[url:value = 'https://example.com/malicious']` 25 - :textmonoborder:`[process:pid = 1234]` 26 - :textmonoborder:`[network-traffic:src_port = 443]` 27 28 Args: 29 pattern (str): The STIX2 pattern string to parse. 30 31 Returns: 32 Optional[str]: The extracted value, or None if no value could be extracted 33 or if the pattern contains multiple criteria. 34 """ 35 if not pattern or not isinstance(pattern, str): 36 return None 37 38 # Remove outer brackets and whitespace 39 pattern = pattern.strip() 40 if pattern.startswith("[") and pattern.endswith("]"): 41 pattern = pattern[1:-1].strip() 42 43 # Check for multiple criteria (AND, OR operators) 44 if " AND " in pattern.upper() or " OR " in pattern.upper(): 45 return None 46 47 # Match the pattern using the precompiled regex 48 match = STIX2_PATTERN_REGEX.search(pattern) 49 if match: 50 return match.group(1) or match.group(2) 51 52 return None
53 54
[docs] 55def extract_uuid_from_stix2_id(stix2_id: str) -> UUID: 56 """ 57 Extract a UUID from a STIX2 ID. 58 59 This function parses a STIX2 identifier string to extract the UUID portion. 60 STIX2 IDs follow the format :textmonoborder:`{type}--{uuid}`, where the UUID is the part 61 after the double dash delimiter. 62 63 :param stix2_id: The STIX2 ID to extract the UUID from 64 :type stix2_id: str 65 :return: The extracted UUID, or a new UUID if extraction fails 66 :rtype: UUID 67 68 .. important:: 69 If the input format is invalid or UUID extraction fails, a new random 70 UUID is generated and returned instead of raising an exception. 71 72 Examples: 73 >>> # Valid STIX2 ID with UUID 74 >>> stix_id = "indicator--44af6c9f-4bbc-4984-a74b-1404d1ac07ea" 75 >>> uuid_obj = extract_uuid_from_stix2_id(stix_id) 76 >>> str(uuid_obj) 77 '44af6c9f-4bbc-4984-a74b-1404d1ac07ea' 78 79 >>> # Invalid STIX2 ID format (no delimiter) 80 >>> stix_id = "indicator-invalid-format" 81 >>> uuid_obj = extract_uuid_from_stix2_id(stix_id) 82 >>> isinstance(uuid_obj, UUID) # Returns a new random UUID 83 True 84 85 >>> # Invalid UUID part 86 >>> stix_id = "indicator--not-a-valid-uuid" 87 >>> uuid_obj = extract_uuid_from_stix2_id(stix_id) 88 >>> isinstance(uuid_obj, UUID) # Returns a new random UUID 89 True 90 """ 91 try: 92 if stix2_id and "--" in stix2_id: 93 # Extract the part after the "--" delimiter 94 uuid_part = stix2_id.split("--", 1)[1] 95 # Try to create a UUID from the extracted part 96 return UUID4(uuid_part, version=4) 97 except (ValueError, IndexError): 98 # If anything goes wrong, return a new UUID 99 pass 100 101 return uuid4()
102 103
[docs] 104def extract_stix2_pattern_name(stix2_pattern: str) -> Optional[str]: 105 """ 106 Extract the name from a STIX 2 pattern string. 107 108 This function parses STIX2 pattern expressions to extract the field name 109 portion before the equality operator. It removes brackets and extracts 110 the left side of the comparison. 111 112 :param stix2_pattern: The STIX 2 pattern string to extract the name from 113 :type stix2_pattern: str 114 :return: The extracted name or None if no name is found 115 :rtype: Optional[str] 116 117 .. note:: 118 The function handles various STIX2 pattern formats including nested 119 hash references like :textmonoborder:`file:hashes.'SHA-256'`. 120 121 Examples: 122 >>> pattern = "[ipv4-addr:value = '192.168.1.1']" 123 >>> extract_stix2_pattern_name(pattern) 124 'ipv4-addr:value' 125 126 >>> pattern = "[file:hashes.'SHA-256' = '123abc']" 127 >>> extract_stix2_pattern_name(pattern) 128 "file:hashes.'SHA-256'" 129 """ 130 _to_replace = [ 131 ("[", ""), 132 ("]", ""), 133 ] 134 if "=" not in stix2_pattern: 135 return "" 136 _stix2_pattern = stix2_pattern 137 for _replace in _to_replace: 138 _stix2_pattern = _stix2_pattern.replace(_replace[0], _replace[1]) 139 return _stix2_pattern.split("=")[0].strip()
140 141
[docs] 142def get_nested_value(obj: Dict[str, Any], path: str) -> Any: 143 """ 144 Get a value from a nested dictionary using a dot-separated path. 145 146 This function safely navigates through nested dictionaries using a 147 dot-separated path string. It returns the value at the specified path 148 or None if any part of the path is missing or invalid. 149 150 :param obj: The dictionary to get the value from 151 :type obj: Dict[str, Any] 152 :param path: The dot-separated path to the value 153 :type path: str 154 :return: The value at the specified path, or None if not found 155 :rtype: Any 156 157 .. warning:: 158 This function returns None for missing paths rather than raising 159 exceptions. Check for None return values when path existence is critical. 160 161 Examples: 162 >>> data = { 163 ... "user": { 164 ... "profile": { 165 ... "name": "John", 166 ... "age": 30 167 ... }, 168 ... "settings": { 169 ... "theme": "dark" 170 ... } 171 ... } 172 ... } 173 >>> get_nested_value(data, "user.profile.name") 174 'John' 175 >>> get_nested_value(data, "user.settings.theme") 176 'dark' 177 """ 178 if not path: 179 return None 180 181 parts = path.split(".") 182 current = obj 183 184 for part in parts: 185 if isinstance(current, dict) and part in current: 186 current = current[part] 187 else: 188 return None 189 190 return current
191 192
[docs] 193def set_nested_value(obj: Dict[str, Any], path: str, value: Any) -> None: 194 """ 195 Set a value in a nested dictionary using a dot-separated path. 196 197 This function creates nested dictionaries as needed to set a value at 198 the specified dot-separated path. If intermediate dictionaries don't 199 exist, they are automatically created. 200 201 :param obj: The dictionary to set the value in 202 :type obj: Dict[str, Any] 203 :param path: The dot-separated path to the value 204 :type path: str 205 :param value: The value to set 206 :type value: Any 207 208 .. note:: 209 The function modifies the input dictionary in-place and automatically 210 creates any missing intermediate dictionary levels. 211 212 Examples: 213 >>> data = {} 214 >>> set_nested_value(data, "user.profile.name", "John") 215 >>> data 216 {'user': {'profile': {'name': 'John'}}} 217 218 >>> # Update existing nested value 219 >>> data = {'user': {'settings': {'theme': 'light'}}} 220 >>> set_nested_value(data, "user.settings.theme", "dark") 221 >>> data 222 {'user': {'settings': {'theme': 'dark'}}} 223 224 >>> # Add new nested path to existing structure 225 >>> set_nested_value(data, "user.profile.age", 30) 226 >>> data 227 {'user': {'settings': {'theme': 'dark'}, 'profile': {'age': 30}}} 228 229 >>> # Empty path does nothing 230 >>> original = {'a': 1} 231 >>> set_nested_value(original, "", "value") 232 >>> original 233 {'a': 1} 234 """ 235 if not path: 236 return 237 238 parts = path.split(".") 239 current = obj 240 241 # Navigate to the parent of the final part 242 for part in parts[:-1]: 243 if part not in current: 244 current[part] = {} 245 current = current[part] 246 247 # Set the value at the final part 248 current[parts[-1]] = value