diff --git a/plugins/SedRegex/__init__.py b/plugins/SedRegex/__init__.py index c7ec09023..2029403b9 100644 --- a/plugins/SedRegex/__init__.py +++ b/plugins/SedRegex/__init__.py @@ -48,14 +48,14 @@ __url__ = 'https://github.com/progval/Limnoria/tree/master/plugins/SedRegex' +from . import sedregex from . import config from . import plugin -from . import constants from importlib import reload +reload(sedregex) reload(config) reload(plugin) -reload(constants) if world.testing: from . import test diff --git a/plugins/SedRegex/config.py b/plugins/SedRegex/config.py index 0d06e5150..41af07e35 100644 --- a/plugins/SedRegex/config.py +++ b/plugins/SedRegex/config.py @@ -49,6 +49,10 @@ def configure(advanced): conf.registerChannelValue(SedRegex, 'displayErrors', registry.Boolean(True, _("""Should errors be displayed?"""))) +conf.registerChannelValue(SedRegex, 'delimiters', + registry.String('', _("""List of delimiters to match sed expressions on. + Multiple delimiters can be specified as a single string: e.g. "/@". + If empty, defaults to all symbols."""))) conf.registerChannelValue(SedRegex, 'boldReplacementText', registry.Boolean(True, _("""Should the replacement text be bolded?"""))) conf.registerChannelValue(SedRegex, 'enable', diff --git a/plugins/SedRegex/constants.py b/plugins/SedRegex/constants.py deleted file mode 100755 index 598aff09c..000000000 --- a/plugins/SedRegex/constants.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python3 - -import re - -TAG_SEEN = 'SedRegex.seen' -TAG_IS_REGEX = 'SedRegex.isRegex' - -SED_REGEX = re.compile( - # This part matches an optional nick followed by ":" or ",", used to direct replacement - # at a particular user. - r"^(?:(?P.+?)[:,] )?" - - # Match and save the delimiter (any one symbol) as a named group - r"s(?P[^\w\s])" - - # Match the pattern to replace, which can be any string up to the first instance of the delimiter - r"(?P(?:(?!(?P=delim)).)*)(?P=delim)" - - # Ditto with the replacement - r"(?P(?:(?!(?P=delim)).)*)" - - # Optional final delimiter plus flags at the end - r"(?:(?P=delim)(?P[a-z]*))?" -) - -if __name__ == '__main__': - print("This is the full regex used by the plugin; paste it into your favourite regex tester " - "for debugging:") - print(SED_REGEX) diff --git a/plugins/SedRegex/plugin.py b/plugins/SedRegex/plugin.py index a34e6a413..2ad831f01 100644 --- a/plugins/SedRegex/plugin.py +++ b/plugins/SedRegex/plugin.py @@ -40,7 +40,6 @@ import supybot.utils as utils import re -import sys try: from supybot.i18n import PluginInternationalization @@ -48,7 +47,10 @@ except ImportError: _ = lambda x: x -from .constants import SED_REGEX, TAG_SEEN, TAG_IS_REGEX +from .sedregex import makeSedRegex + +TAG_SEEN = 'SedRegex.seen' +TAG_IS_REGEX = 'SedRegex.isRegex' # Replace newlines and friends with things like literal "\n" (backslash and "n") axe_spaces = utils.str.MultipleReplacer({'\n': '\\n', '\t': '\\t', '\r': '\\r'}) @@ -56,7 +58,7 @@ class SearchNotFoundError(Exception): pass -class SedRegex(callbacks.PluginRegexp): +class SedRegex(callbacks.Plugin): """ Enable SedRegex on the desired channels: ``config channel #yourchannel plugins.sedregex.enable True`` @@ -88,11 +90,9 @@ class SedRegex(callbacks.PluginRegexp): threaded = True public = True - unaddressedRegexps = ['replacer'] - flags = 0 # Make callback matching case sensitive @staticmethod - def _unpack_sed(expr): + def _unpack_sed(sedRegex, expr): if '\0' in expr: raise ValueError('Expression can\'t contain NUL') @@ -107,7 +107,7 @@ def _unpack_sed(expr): escaped_expr += c - match = SED_REGEX.search(escaped_expr) + match = sedRegex.search(escaped_expr) if not match: return @@ -134,33 +134,32 @@ def _unpack_sed(expr): return (pattern, replacement, count, raw_flags) - # Tag all messages that SedRegex has seen before. This slightly optimizes the ignoreRegex - # feature as all messages tagged with SedRegex.seen but not SedRegex.isRegex is NOT a regexp. - # If we didn't have this tag, we'd have to run a regexp match on each message in the history - # to check if it's a regexp, as there could've been regexp-like messages sent before - # SedRegex was enabled. - def doNotice(self, irc, msg): - if self.registryValue('enable', msg.channel, irc.network): - msg.tag(TAG_SEEN) - def doPrivmsg(self, irc, msg): - # callbacks.PluginRegexp works by defining doPrivmsg(), we don't want to overwrite - # its behaviour - super().doPrivmsg(irc, msg) - self.doNotice(irc, msg) - - # SedRegex main routine. This is called automatically by callbacks.PluginRegexp on every - # message that matches the SED_REGEX expression defined in constants.py - # The actual regexp is passed into PluginRegexp by setting __doc__ equal to the regexp. - def replacer(self, irc, msg, regex): if not self.registryValue('enable', msg.channel, irc.network): return - self.log.debug("SedRegex: running on %s/%s for %s", irc.network, msg.channel, regex) + + # Tag all messages that SedRegex has seen before. This slightly optimizes the ignoreRegex + # feature as all messages tagged with SedRegex.seen but not SedRegex.isRegex is NOT a regexp. + # If we didn't have this tag, we'd have to run a regexp match on each message in the history + # to check if it's a regexp, as there could've been regexp-like messages sent before + # SedRegex was enabled. + msg.tag(TAG_SEEN) + + delimiters = self.registryValue('delimiters', msg.channel, irc.network) + if delimiters: + delimiters = re.escape(delimiters) + sedRegex = makeSedRegex(delimiters) + text = msg.args[1] + regexMatch = sedRegex.match(text) + if not regexMatch: + return + + self.log.debug("SedRegex: running on %s/%s for %s", irc.network, msg.channel, regexMatch) iterable = reversed(irc.state.history) msg.tag(TAG_IS_REGEX) try: - (pattern, replacement, count, flags) = self._unpack_sed(msg.args[1]) + (pattern, replacement, count, flags) = self._unpack_sed(sedRegex, text) except Exception as e: self.log.warning(_("SedRegex parser error: %s"), e, exc_info=True) if self.registryValue('displayErrors', msg.channel, irc.network): @@ -171,14 +170,14 @@ def replacer(self, irc, msg, regex): if 's' in flags: # Special 's' flag lets the bot only look at self messages target = msg.nick else: - target = regex.group('nick') + target = regexMatch.group('nick') if not ircutils.isNick(str(target)): return regex_timeout = self.registryValue('processTimeout') try: message = process(self._replacer_process, irc, msg, - target, pattern, replacement, count, iterable, + target, pattern, replacement, count, iterable, sedRegex, timeout=regex_timeout, pn=self.name(), cn='replacer') except ProcessTimeoutError: irc.error(_("Search timed out.")) @@ -192,9 +191,8 @@ def replacer(self, irc, msg, regex): e.__class__.__name__, e)) else: irc.reply(message, prefixNick=False) - replacer.__doc__ = SED_REGEX.pattern - def _replacer_process(self, irc, msg, target, pattern, replacement, count, messages): + def _replacer_process(self, irc, msg, target, pattern, replacement, count, messages, sedRegex): for m in messages: if m.command in ('PRIVMSG', 'NOTICE') and \ ircutils.strEqual(m.args[0], msg.args[0]) and m.tagged('receivedBy') == irc: @@ -216,7 +214,7 @@ def _replacer_process(self, irc, msg, target, pattern, replacement, count, messa # so we only need to do this check once per message. if not m.tagged(TAG_SEEN): m.tag(TAG_SEEN) - if SED_REGEX.match(m.args[1]): + if sedRegex.match(m.args[1]): m.tag(TAG_IS_REGEX) # Ignore messages containing a regexp if ignoreRegex is on. if self.registryValue('ignoreRegex', msg.channel, irc.network) and m.tagged(TAG_IS_REGEX): @@ -251,6 +249,7 @@ def _replacer_process(self, irc, msg, target, pattern, replacement, count, messa self.log.debug(_("SedRegex: Search %r not found in the last %i messages of %s."), msg.args[1], len(irc.state.history), msg.args[0]) raise SearchNotFoundError() + doNotice = doPrivmsg Class = SedRegex diff --git a/plugins/SedRegex/sedregex.py b/plugins/SedRegex/sedregex.py new file mode 100755 index 000000000..79fc79ba3 --- /dev/null +++ b/plugins/SedRegex/sedregex.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import functools +import re + +@functools.lru_cache +def makeSedRegex(delimiters=''): + delimiters = delimiters or r"^\w\s" + + return re.compile( + # This part matches an optional nick followed by ":" or ",", used to direct replacement + # at a particular user. + r"^(?:(?P.+?)[:,] )?" + + # Match and save the delimiter (any one symbol) as a named group + fr"s(?P[{delimiters}])" + + # Match the pattern to replace, which can be any string up to the first instance of the + # delimiter + r"(?P(?:(?!(?P=delim)).)*)(?P=delim)" + + # Ditto with the replacement + r"(?P(?:(?!(?P=delim)).)*)" + + # Optional final delimiter plus flags at the end + r"(?:(?P=delim)(?P[a-z]*))?" + ) + +if __name__ == '__main__': + print("This is the full regex used by the plugin; paste it into your favourite regex tester " + "for debugging:") + print(makeSedRegex()) diff --git a/plugins/SedRegex/test.py b/plugins/SedRegex/test.py index df8802296..d30c225cc 100644 --- a/plugins/SedRegex/test.py +++ b/plugins/SedRegex/test.py @@ -151,6 +151,33 @@ def testNonSlashSeparator(self): m = self.getMsg(' ') self.assertIn('4 * 2 = 8', str(m)) + def testCustomDelimiters(self): + with conf.supybot.plugins.sedregex.delimiters.context("@."): + self.feedMsg('test') + self.feedMsg('s@t@b') + m = self.getMsg(' ') + self.assertIn('best', str(m)) + + self.feedMsg('s.t.w') + m = self.getMsg(' ') + self.assertIn('west', str(m)) + + # / is not in the delimiters list, so it is ignored + self.getMsg('s/t/r') + for msg in self.irc.state.history: + self.assertNotIn("rest", str(msg)) + + # These would fail if the delimiters set isn't escaped correctly + with conf.supybot.plugins.sedregex.delimiters.context("]["): + self.feedMsg('test') + self.feedMsg('s]t]f') + m = self.getMsg(' ') + self.assertIn('fest', str(m)) + + self.feedMsg('s[t[qu[') + m = self.getMsg(' ') + self.assertIn('quest', str(m)) + def testWeirdSeparatorsFail(self): self.feedMsg("can't touch this", frm=self.__class__.other) # Only symbols are allowed as separators