Skip to content

Instantly share code, notes, and snippets.

@lmmx
Last active February 13, 2026 13:27
Show Gist options
  • Select an option

  • Save lmmx/7e7182753383a7e2b318543fc63248d6 to your computer and use it in GitHub Desktop.

Select an option

Save lmmx/7e7182753383a7e2b318543fc63248d6 to your computer and use it in GitHub Desktop.
Token classifier demo
import json
import torch
from huggingface_hub import hf_hub_download
from transformers import AutoModel, AutoTokenizer
def main():
model_name = "HavelockAI/bert-token-classifier"
# Load tokenizer + model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
model.eval()
# Load type map
type_map_path = hf_hub_download(model_name, "type_to_idx.json")
with open(type_map_path) as f:
type_to_idx = json.load(f)
idx_to_type = {v: k for k, v in type_to_idx.items()}
# The three academic paragraphs
text = (
"If one were to assume that the manuscript under consideration, "
"which might plausibly be interpreted as reflecting certain late-structuralist "
"commitments, were situated within a broader interdisciplinary discourse, "
"then it could be argued that its methodological ambiguities would likely "
"illuminate how epistemic hedging functions across overlapping analytical frameworks.\n\n"
"In particular, if the reader were to encounter a claim that, insofar as it "
"may tentatively be understood as provisionally substantiated, appears to "
"challenge prevailing orthodoxies, then that claim would seem to exemplify "
"the manner in which layered qualifications can simultaneously signal caution, "
"reflexivity, and theoretical openness.\n\n"
"Thus, if it were the case that the argument, which might reasonably be said "
"to be only partially determinate, were evaluated according to conventional "
"standards of evidentiary rigor, one might conclude that its apparent "
"indeterminacy would, under certain interpretive conditions, constitute not "
"a weakness but a productive site of conceptual negotiation."
)
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
logits = model(**inputs)
# Use Viterbi decoding if CRF is available, else argmax
if hasattr(model, "decode"):
preds = model.decode(inputs["input_ids"], inputs["attention_mask"])
else:
preds = logits.argmax(dim=-1) # (1, seq, num_types)
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
print(f"Logits shape: {tuple(logits.shape)}")
print()
for i, token in enumerate(tokens):
active = [
f"{idx_to_type[t]}={'OBI'[v]}"
for t, v in enumerate(preds[0, i].tolist())
if v > 0
]
if active:
print(f"{token:15} {', '.join(active)}")
# Simple runtime checks (not pytest)
assert logits.shape == (
1,
inputs["input_ids"].shape[1],
len(type_to_idx),
3,
), "Unexpected logits shape"
assert (preds > 0).any(), "Model predicted all-O for every type"
if __name__ == "__main__":
main()
import json
import torch
from huggingface_hub import hf_hub_download
from transformers import AutoModel, AutoTokenizer
def main():
model_name = "HavelockAI/bert-token-classifier"
# Load tokenizer + model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
model.eval()
# Load type map
type_map_path = hf_hub_download(model_name, "type_to_idx.json")
with open(type_map_path) as f:
type_to_idx = json.load(f)
idx_to_type = {v: k for k, v in type_to_idx.items()}
# The three podcast-style paragraphs
text = (
"So imagine you're reading this manuscript and you're thinking, okay, "
"this definitely has some late-structuralist vibes going on. If you zoom out "
"and place it in a bigger interdisciplinary conversation, you start to see "
"that all those methodological gray areas aren't just accidental. They’re "
"actually showing you how people hedge their claims differently depending "
"on which analytical lens they're using.\n\n"
"Now picture a reader coming across a claim that kind of pushes against "
"the usual orthodoxy. It’s framed cautiously — lots of 'maybe,' 'arguably,' "
"and 'to some extent.' That layering of qualifications isn’t just academic "
"nervousness. It’s doing work. It signals caution, sure, but it also shows "
"self-awareness and leaves the door open for alternative interpretations.\n\n"
"And here’s the interesting part: if you judged the whole argument by super "
"strict, traditional standards of evidence, you might initially think the "
"fuzziness is a flaw. But under the right interpretive lens, that "
"indeterminacy becomes productive. It turns into a space where meaning is "
"negotiated rather than declared — and that’s kind of the point."
)
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
logits = model(**inputs)
# Use Viterbi decoding if CRF is available, else argmax
if hasattr(model, "decode"):
preds = model.decode(inputs["input_ids"], inputs["attention_mask"])
else:
preds = logits.argmax(dim=-1) # (1, seq, num_types)
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
print(f"Logits shape: {tuple(logits.shape)}")
print()
for i, token in enumerate(tokens):
active = [
f"{idx_to_type[t]}={'OBI'[v]}"
for t, v in enumerate(preds[0, i].tolist())
if v > 0
]
if active:
print(f"{token:15} {', '.join(active)}")
# Simple runtime checks (not pytest)
assert logits.shape == (
1,
inputs["input_ids"].shape[1],
len(type_to_idx),
3,
), "Unexpected logits shape"
assert (preds > 0).any(), "Model predicted all-O for every type"
if __name__ == "__main__":
main()
@lmmx
Copy link
Author

lmmx commented Feb 13, 2026

Screenshot from 2026-02-13 13-25-30 Screenshot from 2026-02-13 13-23-34

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment