making webwatcher better hopefully

This commit is contained in:
2025-10-17 11:47:19 -07:00
parent b39d96f4a0
commit 5e857055e4

View File

@@ -93,15 +93,16 @@ def main():
sys.exit(3) sys.exit(3)
try: try:
matches = re.findall(args.pattern, html, flags) # Capture surrounding context so multiple "Beherit" items are treated separately
# If the regex has groups, re.findall returns tuples; normalize to strings context_window = 120 # number of characters around each match to include
norm = [] unique = set()
for m in matches:
if isinstance(m, tuple): for match in re.finditer(args.pattern, html, flags):
norm.append(" | ".join(m)) start = max(match.start() - context_window, 0)
else: end = min(match.end() + context_window, len(html))
norm.append(m) snippet = html[start:end].replace("\n", " ")
unique = set(norm) unique.add(snippet.strip())
except re.error as e: except re.error as e:
log_line(log_file, f"ERROR regex '{args.pattern}': {e}") log_line(log_file, f"ERROR regex '{args.pattern}': {e}")
sys.exit(4) sys.exit(4)