diff options
author | Ralph Amissah <ralph@amissah.com> | 2008-02-16 00:34:10 +0000 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2008-02-16 00:34:10 +0000 |
commit | e4ca2316db705acbb4608445f994eb0b4c8d208f (patch) | |
tree | 390e4a5f5e251c3fcf22da740fae5fc3cb9f0275 /lib | |
parent | Updated sisu-0.64.4 (diff) | |
parent | markup tag match refinement (diff) |
Merge branch 'upstream' into debian/sid
Diffstat (limited to 'lib')
-rw-r--r-- | lib/sisu/v0/shared_sem.rb | 40 |
1 files changed, 29 insertions, 11 deletions
diff --git a/lib/sisu/v0/shared_sem.rb b/lib/sisu/v0/shared_sem.rb index da989999..f6303ee0 100644 --- a/lib/sisu/v0/shared_sem.rb +++ b/lib/sisu/v0/shared_sem.rb @@ -65,38 +65,56 @@ module SiSU_sem @para=para end def rgx + def exclude + /^<:code>/ + end def each_csc - /[a-z]+[:;]\{|\}[:;][a-z]+/ + /\b[a-z]+[:;]\{|\}[:;][a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)[:;]\{|\}[:;](?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end def each_c - /[a-z]+:\{|\}:[a-z]+/ + /\b[a-z]+:\{|\}:[a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+):\{|\}:(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end def each_sc - /[a-z]+;\{|\};[a-z]+/ + /\b[a-z]+;\{|\};[a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+);\{|\};(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end def pair_csc - /(([a-z]+)(?::\{(.+?)\}:\2|;\{(.+?)\};\2))/ - #/(([a-z]+);\{(.+?)\};\2)/ - #/(([a-z]+)([:;])\{(.+?)\}\3\2)/ + /(([a-z]+)(?::\{(.+?)\}:\2|;\{(.+?)\};\2))/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+)(?::\{(.+?)\}:\2|;\{(.+?)\};\2))/m end def pair_c - /(([a-z]+):\{(.+?)\}:\2)/ + /(([a-z]+):\{(.+?)\}:\2)/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+):\{(.+?)\}:\2)/m end def pair_sc - /(([a-z]+);\{.+?\};\2)/ + /(([a-z]+);\{.+?\};\2)/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+);\{.+?\};\2)/m end def whole_csc_ae - /(([a-z]+)(?::\[(.+?)\]:\2|;\[(.+?)\];\2))/ + /(([a-z]+)(?::\[(.+?)\]:\2|;\[(.+?)\];\2))/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+)(?::\[(.+?)\]:\2|;\[(.+?)\];\2))/m + end + def each_csc_ae + /\b[a-z]+[:;]\[|\][:;][a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)[:;]\[|\][:;](?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end self end def rm def sem_marker_parts - @para.gsub!(rgx.each_csc,'') + unless @para =~ rgx.exclude + @para.gsub!(rgx.each_csc,'') + end @para end def sem_marker_added_extra_parts - @para.gsub!(rgx.whole_csc_ae,'') + unless @para =~ rgx.exclude + @para.gsub!(rgx.whole_csc_ae,'') + if para =~rgx.each_csc_ae; puts "WARNING semantic taggin error: #@para" + end + end @para end def all |