Skip to content

Instantly share code, notes, and snippets.

@reubano
Last active December 31, 2025 17:27
Show Gist options
  • Select an option

  • Save reubano/5dd2252a3e190b083d30b39ecd7d1eda to your computer and use it in GitHub Desktop.

Select an option

Save reubano/5dd2252a3e190b083d30b39ecd7d1eda to your computer and use it in GitHub Desktop.
Collect data into overlapping fixed-length chunks or blocks
from typing import Iterator, Iterable
from collections import deque
from itertools import islice
def sliding_window(
iterable: Iterable[str],
size: int,
overlap: int = 1,
) -> Iterator[str]:
"""Collect data into overlapping fixed-length chunks or blocks.
Args:
iterable: Input string or iterable of strings
size: Window size
overlap: Number of overlapping characters between windows
Yields:
Strings of length n (or less for final window if input exhausted)
Examples:
>>> list(sliding_window('ABCDEFG', 4, 0))
['ABCD', 'EFG']
>>> list(sliding_window('ABCDEFG', 4, 1))
['ABCD', 'DEFG']
>>> list(sliding_window('ABCDEFG', 4, 2))
['ABCD', 'CDEF', 'EFG']
>>> list(sliding_window('ABCDEFG', 4, 3))
['ABCD', 'BCDE', 'CDEF', 'DEFG']
"""
if overlap >= size:
# I know I told you not to do this, but this is an exception (pun intended). Plus
# I know what I'm doing ;)
raise ValueError(f"overlap ({overlap}) must be less than window size ({size})")
elif overlap < 0:
raise ValueError(f"overlap ({overlap}) must be non-negative")
step = size - overlap
iterator = iter(iterable)
# Sam FYI: I mispoke. islice DOES NOT raise a StopIteration exception, only next does
window = deque(islice(iterator, overlap), maxlen=size)
while True:
new_elements = ''.join(islice(iterator, step))
if not new_elements:
break
# The last window may not have enough new elements to fill the step size. In
# that case, remove stale elements of the prev window by resizing the deque
maxlen = overlap + len(new_elements)
window = deque(window, maxlen=maxlen) if maxlen < size else window
window.extend(new_elements)
yield ''.join(window)
@reubano
Copy link
Author

reubano commented Dec 30, 2025

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment