# This is executable.
# My test string ist rather long:
tst = "In this <c:noun:ns>Buch</c:noun>, used to
designate <c:noun:np>Dinge der Wirklichkeit</c:noun>
rather than <c:noun:fs>SW</c:noun>
<c:noun:ns>Ent</c:noun>."
# I want to match the last part of the string:
# <c:noun:fs>SW</c:noun> <c:noun:ns>Ent</c:noun>
# So I define the following pattern an compile it:
pat = r"<c:noun:(.*?)>(.*?)</c:noun>
<c:noun:(.*?)>(.*?)</c:noun>"
rex = re.compile(pat)
# Then I search the string to get a match group :
mat = rex.search(tst)
# If found, print the group
if mat: print mat.group()
# Instead of
# <c:noun:fs>SW</c:noun> <c:noun:ns>Ent</c:noun>
# I get the whole string starting with
# <c:noun:ns>Buch</c:noun>...
# up to the very last </c:noun>
# Apparently the non-greedy operator doesn't work
correctly.
# What's wrong?
|