Last active
December 31, 2025 17:27
-
-
Save reubano/5dd2252a3e190b083d30b39ecd7d1eda to your computer and use it in GitHub Desktop.
Collect data into overlapping fixed-length chunks or blocks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import Iterator, Iterable | |
| from collections import deque | |
| from itertools import islice | |
| def sliding_window( | |
| iterable: Iterable[str], | |
| size: int, | |
| overlap: int = 1, | |
| ) -> Iterator[str]: | |
| """Collect data into overlapping fixed-length chunks or blocks. | |
| Args: | |
| iterable: Input string or iterable of strings | |
| size: Window size | |
| overlap: Number of overlapping characters between windows | |
| Yields: | |
| Strings of length n (or less for final window if input exhausted) | |
| Examples: | |
| >>> list(sliding_window('ABCDEFG', 4, 0)) | |
| ['ABCD', 'EFG'] | |
| >>> list(sliding_window('ABCDEFG', 4, 1)) | |
| ['ABCD', 'DEFG'] | |
| >>> list(sliding_window('ABCDEFG', 4, 2)) | |
| ['ABCD', 'CDEF', 'EFG'] | |
| >>> list(sliding_window('ABCDEFG', 4, 3)) | |
| ['ABCD', 'BCDE', 'CDEF', 'DEFG'] | |
| """ | |
| if overlap >= size: | |
| # I know I told you not to do this, but this is an exception (pun intended). Plus | |
| # I know what I'm doing ;) | |
| raise ValueError(f"overlap ({overlap}) must be less than window size ({size})") | |
| elif overlap < 0: | |
| raise ValueError(f"overlap ({overlap}) must be non-negative") | |
| step = size - overlap | |
| iterator = iter(iterable) | |
| # Sam FYI: I mispoke. islice DOES NOT raise a StopIteration exception, only next does | |
| window = deque(islice(iterator, overlap), maxlen=size) | |
| while True: | |
| new_elements = ''.join(islice(iterator, step)) | |
| if not new_elements: | |
| break | |
| # The last window may not have enough new elements to fill the step size. In | |
| # that case, remove stale elements of the prev window by resizing the deque | |
| maxlen = overlap + len(new_elements) | |
| window = deque(window, maxlen=maxlen) if maxlen < size else window | |
| window.extend(new_elements) | |
| yield ''.join(window) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@scelarek