making webwatcher better hopefully
This commit is contained in:
@@ -93,15 +93,16 @@ def main():
|
||||
sys.exit(3)
|
||||
|
||||
try:
|
||||
matches = re.findall(args.pattern, html, flags)
|
||||
# If the regex has groups, re.findall returns tuples; normalize to strings
|
||||
norm = []
|
||||
for m in matches:
|
||||
if isinstance(m, tuple):
|
||||
norm.append(" | ".join(m))
|
||||
else:
|
||||
norm.append(m)
|
||||
unique = set(norm)
|
||||
# Capture surrounding context so multiple "Beherit" items are treated separately
|
||||
context_window = 120 # number of characters around each match to include
|
||||
unique = set()
|
||||
|
||||
for match in re.finditer(args.pattern, html, flags):
|
||||
start = max(match.start() - context_window, 0)
|
||||
end = min(match.end() + context_window, len(html))
|
||||
snippet = html[start:end].replace("\n", " ")
|
||||
unique.add(snippet.strip())
|
||||
|
||||
except re.error as e:
|
||||
log_line(log_file, f"ERROR regex '{args.pattern}': {e}")
|
||||
sys.exit(4)
|
||||
|
||||
Reference in New Issue
Block a user