Tcl: Split string containing backslash-escapes

From FVue
Jump to: navigation, search

Problem

I have a bash string containing escaped characters and neither tcl list interpretation or tcl split seem to work:

% set a {cd\ \be}
% lindex $a 0
cde  # WRONG: The \b is interpreted as a backslash
% split $a
cd\\ {\be}  # WRONG: two parts, one expected because of the escaped space

Solution

# Split line into words, disregarding backslash escapes (e.g. \b (backspace),
# \g (bell)), but taking backslashed spaces into account.
# Aimed for simulating bash word splitting.
# Example usage:
#
#     % set a {f cd\ \be}
#     % split_words $a
#     f {cd\ \be}
#
# @param string  Line to split
# @return list  Words
proc split_words_bash {line} {
    set words {}
    set glue false
    foreach part [split $line] {
        set glue_next false
        # Does `part' end with a backslash (\)?
        if {[string last "\\" $part] == [string length $part] - [string length "\\"]} {
            # Remove end backslash
            set part [string range $part 0 [expr [string length $part] - [string length "\\"] - 1]]
            # Indicate glue on next run
            set glue_next true
        }; # if
        # Must `part' be appended to latest word (= glue)?
        if {[llength $words] > 0 && [string is true $glue]} {
            # Yes, join `part' to latest word;
            set zz [lindex $words [expr [llength $words] - 1]]
            # Separate glue with backslash-space (\ );
            lset words [expr [llength $words] - 1] "$zz\\ $part"
        } else {
            # No, don't append word to latest word;
            # Append `part' as separate word
            lappend words $part
        }; # if
        set glue $glue_next
    }; # foreach
    return $words
}; # split_words_bash()

See also

http://wiki.tcl.tk/440
Contains a paragraph "When do I need to care about the string representation of a list?"

Comments

blog comments powered by Disqus