In [66]:
# Pygments
# Pull Requests!
# https://github.com/thatch/regexlint
In [67]:
import sys
sys.path.append('/Users/thatch/code/regexlint')
sys.path.append('/Users/thatch/code/pygments/pygments-main') # uses regex for parsing regex, so...
import regexlint.parser
import regexlint.checkers
import time
import math
In [68]:
regexlint.parser.parser_main(['abc*'])
<RootNode type=Token.Other.Progression data=''>
  <Node type=Token.Other.Literal data='a'>
  <Node type=Token.Other.Literal data='b'>
  <Repetition type=Token.Other.Repetition.Star data=''>
    <Node type=Token.Other.Literal data='c'>

In [69]:
regexlint.parser.parser_main([r'x[abc\w]\b'])
<RootNode type=Token.Other.Progression data=''>
  <Node type=Token.Other.Literal data='x'>
  <CharClass type=Token.Other.CharClass data=None>
    <Node type=Token.Other.Literal data='a'>
    <Node type=Token.Other.Literal data='b'>
    <Node type=Token.Other.Literal data='c'>
    <Node type=Token.Other.BuiltinCharclass data='\\w'>
  <Node type=Token.Other.Anchor.WordBoundary data='\\b'>

In [70]:
regexlint.checkers.main([r'x[abc\w]\b'])
[('117', 30, 1, "Overlap in character class: ['a', 'b', 'c']")]

In [71]:
regexlint.checkers.main([r'(\ba)|(b\b)'])
[('114', 30, 1, 'Suspicious use of anchors in alternation')]

In [72]:
regexlint.checkers.main([r'\b(a|b)\b'])
[]

In [73]:
regexlint.checkers.main(['\b'])
[('110', 40, 0, "You probably don't want a backspace. Use another backslash, raw string, or use \\x08 instead)")]

In [74]:
regexlint.checkers.main([u'\U00041234*'])
[('121', 30, 1, 'Wide unicode causes problems in narrow builds')]

In [75]:
len(u'\U00041234')
Out[75]:
2
In [76]:
x=u'\U00041234'
print repr(x[0])
print repr(x[1])
u'\ud8c4'
u'\ude34'

In [77]:
regexlint.parser.parser_main([u'\U00041234*'])
<RootNode type=Token.Other.Progression data=''>
  <Node type=Token.Other.Literal data=u'\ud8c4'>
  <Repetition type=Token.Other.Repetition.Star data=''>
    <Node type=Token.Other.Literal data=u'\ude34'>

In [77]: