Source code for pcapng_utils.tshark.protocols.http2

  1import warnings
  2from functools import cached_property
  3from collections.abc import Set, Sequence, Mapping
  4from typing import ClassVar, Optional, Any
  5
  6from ...payload import Payload
  7from ..types import HarEntry, DictLayers, NameValueDict
  8from ..utils import get_tshark_bytes_from_raw, har_entry_with_common_fields
  9
 10
[docs] 11class Http2Substream: 12 """ 13 Class to represent a HTTP2 substream. It contains the layers of the packet and the metadata of the substream. 14 Wrap the raw HTTP2 substream and the frame layers to extract the relevant information. 15 """ 16 KEEP_LAYERS: ClassVar[Set[str]] = {'frame', 'ip', 'ipv6', 'tcp'} 17 18 def __init__(self, raw_http2_substream: dict[str, Any], all_layers: DictLayers): 19 self.packet_layers: dict[str, Any] = {} 20 for layer, data in all_layers.items(): 21 if layer in self.KEEP_LAYERS: 22 self.packet_layers[layer] = data 23 self.raw_http2_substream = raw_http2_substream 24 25 @property 26 def http2_flags(self) -> int: 27 return int(self.raw_http2_substream.get('http2.flags', '0x0'), 0) 28 29 @property 30 def http2_type(self) -> int: 31 return int(self.raw_http2_substream.get('http2.type', -1)) 32 33 @property 34 def frame_layer(self) -> dict[str, Any]: 35 return self.packet_layers['frame'] 36 37 @property 38 def timestamp(self) -> float: 39 return float(self.frame_layer['frame.time_epoch']) 40 41 @property 42 def frame_nb(self) -> int: 43 # useful for debugging with Wireshark 44 return int(self.frame_layer['frame.number']) 45 46 @property 47 def community_id(self) -> str: 48 return self.packet_layers['community_id'] 49
[docs] 50 @cached_property 51 def ip_version_and_layer(self) -> tuple[str, dict[str, Any]]: 52 ipv4 = "ip" in self.packet_layers 53 ipv6 = "ipv6" in self.packet_layers 54 assert ipv4 ^ ipv6, self 55 ip_version_kw = "ipv6" if ipv6 else "ip" 56 return ip_version_kw, self.packet_layers[ip_version_kw]
57 58 @property 59 def src_host(self) -> str: 60 ipv, ip_layer = self.ip_version_and_layer 61 return ip_layer[f"{ipv}.src_host"] 62 63 @property 64 def dst_host(self) -> str: 65 ipv, ip_layer = self.ip_version_and_layer 66 return ip_layer[f"{ipv}.dst_host"] 67 68 @property 69 def src_ip(self) -> str: 70 ipv, ip_layer = self.ip_version_and_layer 71 return ip_layer[f"{ipv}.src"] 72 73 @property 74 def dst_ip(self) -> str: 75 ipv, ip_layer = self.ip_version_and_layer 76 return ip_layer[f"{ipv}.dst"] 77 78 @property 79 def src_port(self) -> int: 80 return int(self.packet_layers['tcp']['tcp.srcport']) 81 82 @property 83 def dst_port(self) -> int: 84 return int(self.packet_layers['tcp']['tcp.dstport']) 85 86 @property 87 def raw_headers(self) -> list[dict[str, Any]]: 88 headers = self.raw_http2_substream.get('http2.header', []) 89 if isinstance(headers, dict): 90 headers = [headers] # when only 1 header tshark does not wrap it into a list 91 assert isinstance(headers, list), headers 92 return headers
93 94
[docs] 95class Http2RequestResponse: 96 """ 97 Base class to represent a HTTP2 request or response. It contains the headers and data of the request or response. 98 Implements the common properties of a HTTP2 request or response. 99 """ 100 FALLBACK_CONTENT_TYPE: ClassVar[str] = 'application/octet-stream' 101 102 def __init__(self, substreams: Sequence[Http2Substream]): 103 self.substreams = substreams 104 self.headers, self.data, self.headers_streams, self.data_streams = Http2Helper.get_headers_and_data(substreams) 105 106 def __bool__(self) -> bool: 107 return bool(self.substreams) 108 109 @property 110 def frames_nbs(self) -> Sequence[int]: 111 # ordered set of frames numbers 112 return list({s.frame_nb: 0 for s in self.substreams}) 113 114 @property 115 def timestamp(self) -> float: 116 return self.substreams[0].timestamp 117 118 @property 119 def community_id(self) -> str: 120 return self.substreams[0].community_id 121 122 @property 123 def src_host(self) -> str: 124 return self.substreams[0].src_host 125 126 @property 127 def dst_host(self) -> str: 128 return self.substreams[0].dst_host 129 130 @property 131 def src_ip(self) -> str: 132 return self.substreams[0].src_ip 133 134 @property 135 def dst_ip(self) -> str: 136 return self.substreams[0].dst_ip 137 138 @property 139 def src_port(self) -> int: 140 return self.substreams[0].src_port 141 142 @property 143 def dst_port(self) -> int: 144 return self.substreams[0].dst_port 145 146 @property 147 def http_version(self) -> str: 148 return 'HTTP/2' 149 150 @property 151 def header_length(self) -> int: 152 # The effective payload sent over network has bytes size `http2.length` <= `http2.headers.length` 153 # (because special headers - like `:status` - have predefined codes) 154 if not self: 155 return -1 156 return sum(int(s.raw_http2_substream.get('http2.length', 0)) for s in self.headers_streams) 157 158 @property 159 def body_length(self) -> int: 160 """ 161 This is number of compressed bytes (if any compression) 162 163 - `http2.length` is also populated for header substreams 164 - we do NOT always have the `http2.body.fragments` -> `http2.body.reassembled.length` 165 """ 166 if not self: 167 return -1 168 declared_size = sum(int(s.raw_http2_substream.get('http2.length', 0)) for s in self.data_streams) 169 if declared_size != self.data.size and self.headers_map.get('content-encoding', 'identity') == 'identity': 170 warnings.warn( 171 f"Content length mismatch despite no compression: " 172 f"declared ({declared_size}) != computed ({self.data.size})" 173 f"\n{self}" 174 ) 175 return declared_size 176
[docs] 177 @cached_property 178 def headers_map(self) -> dict[str, str]: 179 # <!> only last header value is taken into account if there are some collisions 180 return { 181 h['name'].lower(): h['value'] 182 for h in self.headers 183 }
184 185 @property 186 def http_status(self) -> int: 187 return int(self.headers_map.get(':status', 0)) 188 189 @property 190 def http_method(self) -> str: 191 return self.headers_map.get(':method', '') 192 193 @property 194 def content_type(self) -> str: 195 if not self or not self.data: 196 return '' 197 return self.headers_map.get('content-type', self.FALLBACK_CONTENT_TYPE) 198
[docs] 199 def get_duration_ms(self) -> float: 200 if not self: 201 return -1 202 return round(1000 * (self.substreams[-1].timestamp - self.substreams[0].timestamp), 2)
203 204
[docs] 205class Http2Request(Http2RequestResponse): 206 """ 207 Class to represent a HTTP2 request. It contains the headers and data of the request. 208 """ 209 def __init__(self, substreams: Sequence[Http2Substream]): 210 assert substreams, "At least one substream expected for a request" 211 super().__init__(substreams) 212 213 @property 214 def uri(self) -> str: 215 uris = {s.raw_http2_substream['http2.request.full_uri'] for s in self.headers_streams} 216 assert len(uris) == 1, uris 217 return next(iter(uris)) 218 219 def __str__(self): 220 return ( 221 f"Request [#{','.join(map(str, self.frames_nbs))}]: {len(self.headers_streams)}h + {len(self.data_streams)}d substreams\n\t" 222 f"URI: {self.uri}\n\tHeaders: {self.headers_map}\n\tData: {self.data}" 223 )
224 225
[docs] 226class Http2Response(Http2RequestResponse): 227 """ 228 Class to represent a HTTP2 response. It contains the headers and data of the response. 229 230 <!> May be empty for convenience (response never received) 231 """ 232 def __str__(self): 233 return ( 234 f"Response [#{','.join(map(str, self.frames_nbs))}]: {len(self.headers_streams)}h + {len(self.data_streams)}d substreams\n\t" 235 f"Headers: {self.headers_map}\n\tData: {self.data}" 236 )
237 238
[docs] 239class Http2Stream: 240 """ 241 Class to represent an entire HTTP2 stream (multiple substreams). It contains the request and response objects. 242 Http2Stream represents a single HTTP2 stream that can contain multiple substreams as follows: 243 244 .. code-block:: 245 246 +-------------------------------------- (tcp stream, http2 stream) 247 | Http2SubStream 1 | Request headers (type: 1) 248 | Http2SubStream ... | Request data (type: 0, flags: 0x0) - partial data 249 | Http2SubStream 3 | Request data (type: 0, flags: 0x1) - end of stream, contains reassembled data 250 | (Http2SubStream 4 | Request trailers (type: 1)) 251 +-------------------------------------- 252 | Http2SubStream 5 | Response headers (type: 1) 253 | Http2SubStream ... | Response data (type: 0, flags: 0x0) - partial data 254 | Http2SubStream 7 | Response data (type: 0, flags: 0x1) - end of stream, contains reassembled data 255 | (Http2SubStream 8 | Response trailers (type: 1)) 256 +-------------------------------------- 257 258 Each HTTP2 stream is uniquely identified by a tuple (tcp stream index, http2 stream index) 259 and contains both request and response objects. 260 """
[docs] 261 def __init__(self, tcp_stream_id: int, http2_stream_id: int, community_id: str): 262 """ 263 Defines a HTTP2 stream for the given TCP stream and HTTP2 stream. 264 265 :param tcp_stream_id: the ID of the TCP stream 266 :param http2_stream_id: the ID of the HTTP2 stream 267 :param community_id: the community ID (i.e. TCP|UDP + ips & ports) for this conversation 268 """ 269 self.tcp_stream_id = tcp_stream_id 270 self.http2_stream_id = http2_stream_id 271 self.community_id = community_id 272 self.request: Optional[Http2Request] = None 273 self.response: Optional[Http2Response] = None 274 self.substreams: list[Http2Substream] = []
275 276 @property 277 def id(self) -> tuple[int, int]: 278 return (self.tcp_stream_id, self.http2_stream_id) 279
[docs] 280 def append(self, raw_http2_substream: dict[str, Any], all_layers: DictLayers) -> None: 281 """ 282 Append a new substream to the HTTP2 stream. 283 284 :param substream: the substream to be added 285 :param frame: the frame containing the substream. A frame can contain multiple substreams. 286 """ 287 self.substreams.append(Http2Substream(raw_http2_substream, all_layers))
288 289 @property 290 def waiting_duration(self) -> float: 291 if not self.response: 292 return 0 293 assert self.request, self.id 294 start_stream = self.request.substreams[-1] 295 resp_stream = self.response.substreams[0] 296 return round(1000 * (resp_stream.timestamp - start_stream.timestamp), 2) 297
[docs] 298 def har_entry(self) -> Optional[dict[str, Any]]: 299 """ 300 Create a HAR entry for the HTTP2 stream. It contains the request and response objects. 301 302 :return: the HAR entry for the HTTP2 stream 303 """ 304 assert self.request is not None, self.id 305 assert self.response is not None, self.id 306 if not self.request: 307 assert not self.response, self.id 308 return None 309 first_stream = self.request.headers_streams[0] 310 return har_entry_with_common_fields({ 311 '_timestamp': first_stream.timestamp, 312 'timings': { 313 'send': self.request.get_duration_ms(), 314 'wait': self.waiting_duration, 315 'receive': self.response.get_duration_ms(), 316 }, 317 'serverIPAddress': first_stream.dst_ip, 318 '_communityId': self.community_id, 319 'request': Http2Helper.to_har(self.request), 320 'response': Http2Helper.to_har(self.response), 321 })
322 323 @staticmethod 324 def _get_raw_data_one_substream(raw_http2_substream: Mapping[str, Any]) -> Payload: 325 """ 326 Note: 327 - when dealing with a reassembled data substream, `http2.data.data_raw` MAY not contain all data 328 - if the payload was compressed, tshark decompresses ALL data for us(even if data is reassembled) 329 under `Content-encoded entity body ...` -> `http2.data.data_raw` key, so we check it first 330 """ 331 for k, v in raw_http2_substream.items(): 332 if k.lower().startswith('content-encoded entity body '): 333 assert isinstance(v, dict), (k, v) 334 if 'http2.data.data_raw' not in v: 335 if 'data_raw' in v: # special case for failed decompression (not observed but as http protocol?!) 336 return Payload(get_tshark_bytes_from_raw(v['data_raw'])) 337 # also happens in special case of empty decompressed payload (observed) 338 assert v['http2.data.data'] == '', v 339 return Payload(get_tshark_bytes_from_raw(v.get('http2.data.data_raw'))) 340 if 'http2.body.fragments' in raw_http2_substream: 341 return Payload(get_tshark_bytes_from_raw(raw_http2_substream['http2.body.fragments']['http2.body.reassembled.data_raw'])) 342 return Payload(get_tshark_bytes_from_raw(raw_http2_substream.get('http2.data.data_raw'))) 343
[docs] 344 @classmethod 345 def get_raw_data(cls, raw_http2_substreams: Sequence[Mapping[str, Any]]) -> Payload: 346 """ 347 Find the data in the substreams. 348 349 :param raw_http2_substreams: the data substreams to be analyzed 350 :return: the raw reassembled data if it exists, otherwise an empty Payload 351 """ 352 # 1) search for the unique substream with reassembled data if present 353 substreams_reassembled = { 354 ix: raw_http2_substream for ix, raw_http2_substream in enumerate(raw_http2_substreams) 355 if 'http2.body.fragments' in raw_http2_substream 356 } 357 if substreams_reassembled: 358 # should be unique and for last data substream (on rare cases: != at end of stream) 359 assert len(substreams_reassembled) == 1, substreams_reassembled 360 ix_reassembled, substream_reassembled = next(iter(substreams_reassembled.items())) 361 # assert substream_reassembled['http2.flags'] & 0x01, substream_reassembled 362 assert ix_reassembled == len(raw_http2_substreams) - 1, raw_http2_substreams 363 return cls._get_raw_data_one_substream(substream_reassembled) 364 # 2) if there is none (which happens) we manually concatenate fragments 365 # <!> decompression for overall content is NOT implemented (should not happen?!) 366 return Payload.concat(*(cls._get_raw_data_one_substream(ss) for ss in raw_http2_substreams))
367
[docs] 368 def process(self) -> None: 369 """ 370 Process the substreams and create the request and response objects accordingly. Substreams are processed in 371 order, the first substreams are request headers, followed by request data, and finally the response headers and 372 data. The reassembled data is used to create the request and response objects. 373 374 Request substreams are identified by the presence of the 'http2.request.full_uri' key in the raw stream. 375 If no response substream is found, the request object is created with the first substreams. 376 377 It retrieves the source and destination IP addresses from the first substream to identify the substreams that 378 belong to the request. The response substreams are identified by checking their source IP address matches 379 the destination IP address of the first substream. 380 """ 381 assert self.substreams, self.id 382 383 # Find a request frame and its associated IPs 384 src, dst = None, None 385 for substream in self.substreams: 386 if 'http2.request.full_uri' in substream.raw_http2_substream: # This is a request 387 src, dst = substream.src_ip, substream.dst_ip 388 break 389 assert src and dst, self.substreams 390 assert src != dst, src 391 392 # Create the request and response objects with their associated substreams 393 req_substreams = [substream for substream in self.substreams if substream.src_ip == src] 394 resp_substreams = [substream for substream in self.substreams if substream.src_ip == dst] 395 assert len(req_substreams) + len(resp_substreams) == len(self.substreams), self.substreams 396 self.request = Http2Request(req_substreams) 397 self.response = Http2Response(resp_substreams) # may be empty
398 399 def __str__(self): 400 return ( 401 f'TCP Stream: {self.tcp_stream_id}, ' 402 f'HTTP2 Stream: {self.http2_stream_id}' 403 f'\n{self.request}' 404 f'\n{self.response}' 405 )
406 407
[docs] 408class Http2Helper: 409
[docs] 410 @staticmethod 411 def substream_is_header(substream: Http2Substream) -> bool: 412 """Returns whether substream is a header substream.""" 413 stream_type = substream.http2_type 414 return stream_type == 1
415
[docs] 416 @staticmethod 417 def substream_is_data(substream: Http2Substream) -> bool: 418 """Returns whether substream is a data substream.""" 419 stream_type = substream.http2_type 420 return stream_type == 0
421
[docs] 422 @staticmethod 423 def get_headers(substream: Http2Substream) -> list[NameValueDict]: 424 """ 425 Extract the headers from the substream (precondition: it is a header substream). 426 427 :param substream: the substream to be analyzed 428 :return: the headers of the substream 429 """ 430 headers: list[NameValueDict] = [] 431 for header in substream.raw_headers: 432 # cope for non-ASCII headers 433 try: 434 h_name = get_tshark_bytes_from_raw(header['http2.header.name_raw']).decode() 435 h_value = get_tshark_bytes_from_raw(header.get('http2.header.value_raw')).decode() 436 headers.append({ 437 'name': h_name.strip(), 438 'value': h_value.strip(), 439 }) 440 except Exception as e: 441 e.add_note(f"{header=}") 442 raise 443 return headers
444
[docs] 445 @staticmethod 446 def to_har(message: Http2RequestResponse) -> dict[str, Any]: 447 """ 448 Convert the HTTP2 request or response to a HAR entry. 449 450 <!> Some HTTP2 responses are missing 451 452 :param message: the HTTP2 request or response to be converted 453 :return: the HAR entry for the HTTP2 request or response 454 """ 455 entry = { 456 '_timestamp': message.timestamp if message else None, 457 '_rawFramesNumbers': message.frames_nbs, 458 'httpVersion': message.http_version, 459 'cookies': [], 460 'headers': message.headers, 461 'headersSize': message.header_length, 462 'bodySize': message.body_length, 463 } 464 if message: 465 entry['_communication'] = { 466 'src': { 467 'ip': message.src_ip, 468 'host': message.src_host, 469 'port': message.src_port, 470 }, 471 'dst': { 472 'ip': message.dst_ip, 473 'host': message.dst_host, 474 'port': message.dst_port, 475 } 476 } 477 if isinstance(message, Http2Request): 478 entry |= { 479 'method': message.http_method, 480 'url': message.uri, 481 'queryString': [], 482 } 483 if message.data.size: 484 message.data.update_har_request(entry, message.content_type) 485 else: 486 entry |= { 487 'status': message.http_status, 488 'statusText': '', 489 'redirectURL': '', 490 } 491 message.data.update_har_response(entry, message.content_type) 492 return entry
493
[docs] 494 @staticmethod 495 def get_data(data_substreams: Sequence[Http2Substream]) -> Payload: 496 """ 497 Extract the data from the substreams (precondition: all substreams are data substreams). 498 499 :param data_substreams: the data substreams to be analyzed 500 :return: the reassembled data 501 """ 502 return Http2Stream.get_raw_data([ss.raw_http2_substream for ss in data_substreams])
503
[docs] 504 @classmethod 505 def get_headers_and_data(cls, substreams: Sequence[Http2Substream]): 506 """ 507 Identify the headers and data substreams and return them. 508 509 The substreams are identified by their types: 510 - Headers substream: type 1 511 - Data substream: type 0 512 We ignore the rest of the substreams. 513 514 Note that (flag & 0x01) identify the end of stream, usually it happens for a data-stream 515 but it may also happen for a header-stream (trailers in gRPC), 516 or even never happen. 517 518 :param substreams: the substreams of a HTTP2 stream 519 :return: the headers and data substreams regardless if it is a request or a response 520 """ 521 headers: list[NameValueDict] = [] 522 headers_streams: list[Http2Substream] = [] 523 data_streams: list[Http2Substream] = [] 524 525 for substream in substreams: 526 # Parse headers (HTTP2 substream marked as headers) 527 if cls.substream_is_header(substream): 528 headers_streams.append(substream) 529 headers += Http2Helper.get_headers(substream) 530 # Register data substreams 531 if cls.substream_is_data(substream): 532 data_streams.append(substream) 533 534 if substreams: 535 assert headers_streams, (len(substreams), data_streams) 536 537 return headers, Http2Helper.get_data(data_streams), headers_streams, data_streams
538 539
[docs] 540class Http2Traffic: 541 """ 542 Class to represent the HTTP2 traffic. It contains the HTTP2 streams and the parsed traffic data. 543 544 In HTTP/2, frames are the smallest unit of communication. 545 Each frame has a specific type and can have associated flags. 546 547 **HTTP/2 frame types and flags:** 548 549 550 HTTP/2 Frame Types: 551 552 - `DATA (0x0)`: carries arbitrary, variable-length sequences of octets associated with a stream. 553 - `HEADERS (0x1)`: used to open a stream and carry a header block fragment. 554 - `PRIORITY (0x2)`: specifies the sender-advised priority of a stream. 555 - `RST_STREAM (0x3)`: abruptly terminates a stream. 556 - `SETTINGS (0x4)`: used to communicate configuration parameters. 557 - `PUSH_PROMISE (0x5)`: used to notify the peer endpoint in advance of streams the sender intends to initiate. 558 - `PING (0x6)`: used to measure round-trip time and ensure the connection is still active. 559 - `GOAWAY (0x7)`: informs the peer to stop creating streams on this connection. 560 - `WINDOW_UPDATE (0x8)`: used to implement flow control. 561 - `CONTINUATION (0x9)`: used to continue a sequence of header block fragments. 562 563 HTTP/2 Frame Flags: 564 565 - `END_STREAM (0x1)`: indicates that the frame is the last one for the current stream. 566 - `END_HEADERS (0x4)`: indicates that the frame contains the entire header block. 567 - `PADDED (0x8)`: indicates that the frame contains padding. 568 - `PRIORITY (0x20)`: indicates that the frame contains priority information. 569 570 **TCP stream ID and the HTTP/2 stream ID** 571 The TCP stream ID identifies a unique TCP connection. Each TCP connection is assigned a unique stream ID, 572 which is used to track the packets that belong to that connection. 573 The HTTP/2 stream ID, within a single TCP connection, multiple HTTP/2 streams can exist. Each HTTP/2 stream is 574 identified by a unique stream ID within the context of that TCP connection. These stream IDs are used to 575 multiplex multiple HTTP/2 requests and responses over a single TCP connection. 576 577 A single TCP stream (connection) can contain multiple HTTP/2 streams. Each HTTP/2 stream is 578 uniquely identified within the context of its TCP stream. The combination of the TCP stream ID and the 579 HTTP/2 stream ID uniquely identifies an HTTP/2 stream within the network traffic. 580 """ 581 def __init__(self, traffic: Sequence[DictLayers]): 582 self.traffic = traffic 583 self.stream_pairs: dict[tuple[int, int], Http2Stream] = {} 584 self.parse_traffic() 585
[docs] 586 def parse_traffic(self) -> None: 587 """ 588 Parse the traffic and extract the HTTP2 streams. It creates a dictionary for each HTTP2 stream. 589 Each key is a tuple with the TCP stream ID and the HTTP2 stream ID. 590 591 Identify each HTTP2 request and its associated HTTP2 response by following these steps: 592 593 1. Iterate through packets: it loops through all packets obtained from the `traffic` object. 594 2. Extract protocols: for each packet, it extracts the protocols from the `frame.protocols` field. 595 3. Check for HTTP2 protocol: it checks if the packet contains the `http2` protocol. 596 4. Extract the TCP stream ID: it retrieves the TCP stream ID from the `tcp.stream` field. 597 5. Handle HTTP2 layer: it ensures the `http2` layer is a list of HTTP2 stream objects. 598 6. Process each HTTP2 stream: for each HTTP2 stream in the `http2` layer: 599 600 - extract stream information: it retrieves the stream type and stream ID. 601 - filter relevant streams: it ignores streams that are not data (type 0) or headers (type 1). 602 - create or update stream pair: it creates a new tuple of `(tcp_stream_id, http2_stream_id)` if it does not 603 exist and appends the substream to the list. 604 7. Process streams: after assembling the HTTP2 streams, it processes each stream to create the request and 605 response objects. 606 """ 607 # Assemble the HTTP2 streams 608 for layers in self.traffic: 609 protocols = layers['frame']['frame.protocols'].split(':') 610 # Ignore non-http2 packets 611 if 'http2' not in protocols: 612 continue 613 tcp_stream_id = int(layers['tcp']['tcp.stream']) 614 community_id: str = layers['communityid'] 615 616 # HTTP2 layer can be a list of streams or a single stream, force a list 617 http2_layer: list[dict[str, Any]] = layers['http2'] 618 if not isinstance(http2_layer, list): 619 http2_layer = [layers['http2']] 620 621 for http2_layer_stream in http2_layer: 622 stream = http2_layer_stream['http2.stream'] 623 assert isinstance(stream, dict), type(stream) 624 http2_frame_type = int(stream.get('http2.type', -1)) 625 # Ignore streams that are not data or headers 626 if http2_frame_type not in {0, 1}: 627 continue 628 # <!> Edge-case: reassembled body is at top-level instead of nested in its stream 629 if 'http2.body.fragments' in http2_layer_stream: 630 assert 'http2.body.fragments' not in stream, http2_layer_stream 631 stream['http2_layer_stream'] = http2_layer_stream.pop('http2.body.fragments') 632 # Create a new tuple of (tcp_stream_id, http2_stream_id) if it does not exist 633 http2_stream_id = int(stream['http2.streamid']) 634 sid = (tcp_stream_id, http2_stream_id) 635 if sid not in self.stream_pairs: 636 self.stream_pairs[sid] = Http2Stream(*sid, community_id=community_id) 637 else: 638 assert community_id == self.stream_pairs[sid].community_id, (community_id, self.stream_pairs[sid].community_id) 639 # Append the substream to the list 640 self.stream_pairs[sid].append(stream, layers) 641 642 # Process the streams, once for all 643 for http2_stream in self.stream_pairs.values(): 644 http2_stream.process()
645
[docs] 646 def get_http2_streams(self): 647 return list(self.stream_pairs.values())
648
[docs] 649 def get_har_entries(self) -> list[HarEntry]: 650 """ 651 Convert the HTTP2 traffic to HTTP Archive (HAR) format. 652 653 :return: the HTTP2 traffic in HAR format 654 """ 655 entries = [] 656 for stream in self.get_http2_streams(): 657 har_entry = stream.har_entry() 658 if har_entry: 659 entries.append(har_entry) 660 return entries