inilike.common source code

1 /**
2  * Common functions for dealing with entries in ini-like file.
3  * Authors:
4  *  $(LINK2 https://github.com/FreeSlave, Roman Chistokhodov)
5  * Copyright:
6  *  Roman Chistokhodov, 2015-2016
7  * License:
8  *  $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
9  * See_Also:
10  *  $(LINK2 http://standards.freedesktop.org/desktop-entry-spec/latest/index.html, Desktop Entry Specification)
11  */
12 
13 module inilike.common;
14 
15 package {
16     import std.algorithm;
17     import std.range;
18     import std.string;
19     import std.traits;
20     import std.typecons;
21     import std.conv : to;
22 
23     static if( __VERSION__ < 2066 ) enum nogc = 1;
24 
25     auto keyValueTuple(String)(String key, String value)
26     {
27         alias KeyValueTuple = Tuple!(String, "key", String, "value");
28         return KeyValueTuple(key, value);
29     }
30 }
31 
32 private @nogc @safe auto simpleStripLeft(inout(char)[] s) pure nothrow
33 {
34     size_t spaceNum = 0;
35     while(spaceNum < s.length) {
36         const char c = s[spaceNum];
37         if (c == ' ' || c == '\t') {
38             spaceNum++;
39         } else {
40             break;
41         }
42     }
43     return s[spaceNum..$];
44 }
45 
46 private @nogc @safe auto simpleStripRight(inout(char)[] s) pure nothrow
47 {
48     size_t spaceNum = 0;
49     while(spaceNum < s.length) {
50         const char c = s[$-1-spaceNum];
51         if (c == ' ' || c == '\t') {
52             spaceNum++;
53         } else {
54             break;
55         }
56     }
57 
58     return s[0..$-spaceNum];
59 }
60 
61 
62 /**
63  * Test whether the string s represents a comment.
64  */
65 @nogc @safe bool isComment(const(char)[] s) pure nothrow
66 {
67     s = s.simpleStripLeft;
68     return !s.empty && s[0] == '#';
69 }
70 
71 ///
72 unittest
73 {
74     assert( isComment("# Comment"));
75     assert( isComment("   # Comment"));
76     assert(!isComment("Not comment"));
77     assert(!isComment(""));
78 }
79 
80 /**
81  * Test whether the string s represents a group header.
82  * Note: "[]" is not considered as valid group header.
83  */
84 @nogc @safe bool isGroupHeader(const(char)[] s) pure nothrow
85 {
86     s = s.simpleStripRight;
87     return s.length > 2 && s[0] == '[' && s[$-1] == ']';
88 }
89 
90 ///
91 unittest
92 {
93     assert( isGroupHeader("[Group]"));
94     assert( isGroupHeader("[Group]    "));
95     assert(!isGroupHeader("[]"));
96     assert(!isGroupHeader("[Group"));
97     assert(!isGroupHeader("Group]"));
98 }
99 
100 /**
101  * Retrieve group name from header entry.
102  * Returns: group name or empty string if the entry is not group header.
103  */
104 
105 @nogc @safe auto parseGroupHeader(inout(char)[] s) pure nothrow
106 {
107     s = s.simpleStripRight;
108     if (isGroupHeader(s)) {
109         return s[1..$-1];
110     } else {
111         return null;
112     }
113 }
114 
115 ///
116 unittest
117 {
118     assert(parseGroupHeader("[Group name]") == "Group name");
119     assert(parseGroupHeader("NotGroupName") == string.init);
120 
121     assert(parseGroupHeader("[Group name]".dup) == "Group name".dup);
122 }
123 
124 /**
125  * Parse entry of kind Key=Value into pair of Key and Value.
126  * Returns: tuple of key and value strings or tuple of empty strings if it's is not a key-value entry.
127  * Note: this function does not check whether parsed key is valid key.
128  */
129 @nogc @trusted auto parseKeyValue(String)(String s) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char))
130 {
131     auto t = s.findSplit("=");
132     auto key = t[0];
133     auto value = t[2];
134 
135     if (key.length && t[1].length) {
136         return keyValueTuple(key, value);
137     }
138     return keyValueTuple(String.init, String.init);
139 }
140 
141 ///
142 unittest
143 {
144     assert(parseKeyValue("Key=Value") == tuple("Key", "Value"));
145     assert(parseKeyValue("Key=") == tuple("Key", string.init));
146     assert(parseKeyValue("=Value") == tuple(string.init, string.init));
147     assert(parseKeyValue("NotKeyValue") == tuple(string.init, string.init));
148 
149     assert(parseKeyValue("Key=Value".dup) == tuple("Key".dup, "Value".dup));
150 }
151 
152 private @nogc @safe bool simpleCanFind(in char[] str, char c) pure nothrow
153 {
154     for (size_t i=0; i<str.length; ++i) {
155         if (str[i] == c) {
156             return true;
157         }
158     }
159     return false;
160 }
161 
162 /**
163  * Test whether the string is valid key, i.e. does not need escaping, is not a comment and not empty string.
164  */
165 @nogc @safe bool isValidKey(in char[] key) pure nothrow
166 {
167     if (key.empty || key.simpleStripLeft.simpleStripRight.empty) {
168         return false;
169     }
170     if (key.isComment || key.simpleCanFind('=') || key.needEscaping()) {
171         return false;
172     }
173     return true;
174 }
175 
176 ///
177 unittest
178 {
179     assert(isValidKey("Valid key"));
180     assert(!isValidKey(""));
181     assert(!isValidKey("    "));
182     assert(!isValidKey("Sneaky\nKey"));
183     assert(!isValidKey("# Sneaky key"));
184     assert(!isValidKey("Sneaky=key"));
185 }
186 
187 /**
188 * Test whether the string is valid key in terms of Desktop File Specification.
189 *
190 * Not actually used in $(D inilike.file.IniLikeFile), but can be used in derivatives.
191 * Only the characters A-Za-z0-9- may be used in key names.
192 * Note: this function automatically separate key from locale. Locale is validated against isValidKey.
193 * See_Also: $(LINK2 http://standards.freedesktop.org/desktop-entry-spec/latest/ar01s02.html, Basic format of the file), $(D isValidKey)
194 */
195 @nogc @safe bool isValidDesktopFileKey(in char[] desktopKey) pure nothrow {
196     auto t = separateFromLocale(desktopKey);
197     auto key = t[0];
198     auto locale = t[1];
199 
200     if (locale.length && !isValidKey(locale)) {
201         return false;
202     }
203 
204     @nogc @safe static bool isValidDesktopFileKeyChar(char c) pure nothrow {
205         return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '-';
206     }
207 
208     if (key.empty) {
209         return false;
210     }
211     for (size_t i = 0; i<key.length; ++i) {
212         if (!isValidDesktopFileKeyChar(key[i])) {
213             return false;
214         }
215     }
216     return true;
217 }
218 
219 ///
220 unittest
221 {
222     assert(isValidDesktopFileKey("Generic-Name"));
223     assert(isValidDesktopFileKey("Generic-Name[ru_RU]"));
224     assert(!isValidDesktopFileKey("Name$"));
225     assert(!isValidDesktopFileKey(""));
226     assert(!isValidDesktopFileKey("[ru_RU]"));
227     assert(!isValidDesktopFileKey("Name[ru\nRU]"));
228 }
229 
230 /**
231  * Test whether the entry value represents true.
232  * See_Also: $(D isFalse), $(D isBoolean)
233  */
234 @nogc @safe bool isTrue(const(char)[] value) pure nothrow {
235     return (value == "true" || value == "1");
236 }
237 
238 ///
239 unittest
240 {
241     assert(isTrue("true"));
242     assert(isTrue("1"));
243     assert(!isTrue("not boolean"));
244 }
245 
246 /**
247  * Test whether the entry value represents false.
248  * See_Also: $(D isTrue), $(D isBoolean)
249  */
250 @nogc @safe bool isFalse(const(char)[] value) pure nothrow {
251     return (value == "false" || value == "0");
252 }
253 
254 ///
255 unittest
256 {
257     assert(isFalse("false"));
258     assert(isFalse("0"));
259     assert(!isFalse("not boolean"));
260 }
261 
262 /**
263  * Check if the entry value can be interpreted as boolean value.
264  * See_Also: $(D isTrue), $(D isFalse)
265  */
266 @nogc @safe bool isBoolean(const(char)[] value) pure nothrow {
267     return isTrue(value) || isFalse(value);
268 }
269 
270 ///
271 unittest
272 {
273     assert(isBoolean("true"));
274     assert(isBoolean("1"));
275     assert(isBoolean("false"));
276     assert(isBoolean("0"));
277     assert(!isBoolean("not boolean"));
278 }
279 
280 /**
281  * Convert bool to string. Can be used to set boolean values.
282  * See_Also: $(D isBoolean)
283  */
284 @nogc @safe string boolToString(bool b) nothrow pure {
285     return b ? "true" : "false";
286 }
287 
288 ///
289 unittest
290 {
291     assert(boolToString(false) == "false");
292     assert(boolToString(true) == "true");
293 }
294 
295 /**
296  * Make locale name based on language, country, encoding and modifier.
297  * Returns: locale name in form lang_COUNTRY.ENCODING@MODIFIER
298  * See_Also: $(D parseLocaleName)
299  */
300 @safe String makeLocaleName(String)(
301     String lang, String country = null,
302     String encoding = null,
303     String modifier = null) pure
304 if (isSomeString!String && is(ElementEncodingType!String : char))
305 {
306     return lang ~ (country.length ? "_".to!String~country : String.init)
307                 ~ (encoding.length ? ".".to!String~encoding : String.init)
308                 ~ (modifier.length ? "@".to!String~modifier : String.init);
309 }
310 
311 ///
312 unittest
313 {
314     assert(makeLocaleName("ru", "RU") == "ru_RU");
315     assert(makeLocaleName("ru", "RU", "UTF-8") == "ru_RU.UTF-8");
316     assert(makeLocaleName("ru", "RU", "UTF-8", "mod") == "ru_RU.UTF-8@mod");
317     assert(makeLocaleName("ru", string.init, string.init, "mod") == "ru@mod");
318 
319     assert(makeLocaleName("ru".dup, (char[]).init, (char[]).init, "mod".dup) == "ru@mod".dup);
320 }
321 
322 /**
323  * Parse locale name into the tuple of 4 values corresponding to language, country, encoding and modifier
324  * Returns: Tuple!(string, "lang", string, "country", string, "encoding", string, "modifier")
325  * See_Also: $(D makeLocaleName)
326  */
327 @nogc @trusted auto parseLocaleName(String)(String locale) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char))
328 {
329     auto modifiderSplit = findSplit(locale, "@");
330     auto modifier = modifiderSplit[2];
331 
332     auto encodongSplit = findSplit(modifiderSplit[0], ".");
333     auto encoding = encodongSplit[2];
334 
335     auto countrySplit = findSplit(encodongSplit[0], "_");
336     auto country = countrySplit[2];
337 
338     auto lang = countrySplit[0];
339 
340     alias LocaleTuple = Tuple!(String, "lang", String, "country", String, "encoding", String, "modifier");
341 
342     return LocaleTuple(lang, country, encoding, modifier);
343 }
344 
345 ///
346 unittest
347 {
348     assert(parseLocaleName("ru_RU.UTF-8@mod") == tuple("ru", "RU", "UTF-8", "mod"));
349     assert(parseLocaleName("ru@mod") == tuple("ru", string.init, string.init, "mod"));
350     assert(parseLocaleName("ru_RU") == tuple("ru", "RU", string.init, string.init));
351 
352     assert(parseLocaleName("ru_RU.UTF-8@mod".dup) == tuple("ru".dup, "RU".dup, "UTF-8".dup, "mod".dup));
353 }
354 
355 /**
356  * Drop encoding part from locale (it's not used in constructing localized keys).
357  * Returns: Locale string with encoding part dropped out or original string if encoding was not present.
358  */
359 @safe String dropEncodingPart(String)(String locale) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char))
360 {
361     auto t = parseLocaleName(locale);
362     if (!t.encoding.empty) {
363         return makeLocaleName(t.lang, t.country, String.init, t.modifier);
364     }
365     return locale;
366 }
367 
368 ///
369 unittest
370 {
371     assert("ru_RU.UTF-8".dropEncodingPart() == "ru_RU");
372     string locale = "ru_RU";
373     assert(locale.dropEncodingPart() is locale);
374 }
375 
376 /**
377  * Construct localized key name from key and locale.
378  * Returns: localized key in form key[locale] dropping encoding out if present.
379  * See_Also: $(D separateFromLocale)
380  */
381 @safe String localizedKey(String)(String key, String locale) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char))
382 {
383     if (locale.empty) {
384         return key;
385     }
386     return key ~ "[".to!String ~ locale.dropEncodingPart() ~ "]".to!String;
387 }
388 
389 ///
390 unittest
391 {
392     string key = "Name";
393     assert(localizedKey(key, "") == key);
394     assert(localizedKey("Name", "ru_RU") == "Name[ru_RU]");
395     assert(localizedKey("Name", "ru_RU.UTF-8") == "Name[ru_RU]");
396 }
397 
398 /**
399  * ditto, but constructs locale name from arguments.
400  */
401 @safe String localizedKey(String)(String key, String lang, String country, String modifier = null) pure if (isSomeString!String && is(ElementEncodingType!String : char))
402 {
403     return key ~ "[".to!String ~ makeLocaleName(lang, country, String.init, modifier) ~ "]".to!String;
404 }
405 
406 ///
407 unittest
408 {
409     assert(localizedKey("Name", "ru", "RU") == "Name[ru_RU]");
410     assert(localizedKey("Name".dup, "ru".dup, "RU".dup) == "Name[ru_RU]".dup);
411 }
412 
413 /**
414  * Separate key name into non-localized key and locale name.
415  * If key is not localized returns original key and empty string.
416  * Returns: tuple of key and locale name.
417  * See_Also: $(D localizedKey)
418  */
419 @nogc @trusted auto separateFromLocale(String)(String key) pure nothrow if (isSomeString!String && is(ElementEncodingType!String : char)) {
420     if (key.endsWith("]")) {
421         auto t = key.findSplit("[");
422         if (t[1].length) {
423             return tuple(t[0], t[2][0..$-1]);
424         }
425     }
426     return tuple(key, typeof(key).init);
427 }
428 
429 ///
430 unittest
431 {
432     assert(separateFromLocale("Name[ru_RU]") == tuple("Name", "ru_RU"));
433     assert(separateFromLocale("Name") == tuple("Name", string.init));
434 
435     char[] mutableString = "Hello".dup;
436     assert(separateFromLocale(mutableString) == tuple(mutableString, typeof(mutableString).init));
437 }
438 
439 /**
440  * Choose the better localized value matching to locale between two localized values. The "goodness" is determined using algorithm described in $(LINK2 http://standards.freedesktop.org/desktop-entry-spec/latest/ar01s04.html, Localized values for keys).
441  * Params:
442  *  locale = original locale to match to
443  *  firstLocale = first locale
444  *  firstValue = first value
445  *  secondLocale = second locale
446  *  secondValue = second value
447  * Returns: The best alternative among two or empty string if none of alternatives match original locale.
448  * Note: value with empty locale is considered better choice than value with locale that does not match the original one.
449  */
450 @nogc @trusted auto chooseLocalizedValue(String)(
451     String locale,
452     String firstLocale,  String firstValue,
453     String secondLocale, String secondValue) pure nothrow
454     if (isSomeString!String && is(ElementEncodingType!String : char))
455 {
456     const lt = parseLocaleName(locale);
457     const lt1 = parseLocaleName(firstLocale);
458     const lt2 = parseLocaleName(secondLocale);
459 
460     int score1, score2;
461 
462     if (lt.lang == lt1.lang) {
463         score1 = 1 + ((lt.country == lt1.country) ? 2 : 0 ) + ((lt.modifier == lt1.modifier) ? 1 : 0);
464     }
465     if (lt.lang == lt2.lang) {
466         score2 = 1 + ((lt.country == lt2.country) ? 2 : 0 ) + ((lt.modifier == lt2.modifier) ? 1 : 0);
467     }
468 
469     if (score1 == 0 && score2 == 0) {
470         if (firstLocale.empty && !firstValue.empty) {
471             return tuple(firstLocale, firstValue);
472         } else if (secondLocale.empty && !secondValue.empty) {
473             return tuple(secondLocale, secondValue);
474         } else {
475             return tuple(String.init, String.init);
476         }
477     }
478 
479     if (score1 >= score2) {
480         return tuple(firstLocale, firstValue);
481     } else {
482         return tuple(secondLocale, secondValue);
483     }
484 }
485 
486 ///
487 unittest
488 {
489     string locale = "ru_RU.UTF-8@jargon";
490     assert(chooseLocalizedValue(string.init, "ru_RU", "Программист", "ru@jargon", "Кодер") == tuple(string.init, string.init));
491     assert(chooseLocalizedValue(locale, "fr_FR", "Programmeur", string.init, "Programmer") == tuple(string.init, "Programmer"));
492     assert(chooseLocalizedValue(locale, string.init, "Programmer", "de_DE", "Programmierer") == tuple(string.init, "Programmer"));
493     assert(chooseLocalizedValue(locale, "fr_FR", "Programmeur", "de_DE", "Programmierer") == tuple(string.init, string.init));
494 
495     assert(chooseLocalizedValue(string.init, string.init, "Value", string.init, string.init) == tuple(string.init, "Value"));
496     assert(chooseLocalizedValue(locale, string.init, "Value", string.init, string.init) == tuple(string.init, "Value"));
497     assert(chooseLocalizedValue(locale, string.init, string.init, string.init, "Value") == tuple(string.init, "Value"));
498 
499     assert(chooseLocalizedValue(locale, "ru_RU", "Программист", "ru@jargon", "Кодер") == tuple("ru_RU", "Программист"));
500     assert(chooseLocalizedValue(locale, "ru_RU", "Программист", "ru_RU@jargon", "Кодер") == tuple("ru_RU@jargon", "Кодер"));
501 
502     assert(chooseLocalizedValue(locale, "ru", "Разработчик", "ru_RU", "Программист") == tuple("ru_RU", "Программист"));
503 }
504 
505 /**
506  * Check if value needs to be escaped. This function is currently tolerant to single slashes and tabs.
507  * Returns: true if value needs to escaped, false otherwise.
508  * See_Also: $(D escapeValue)
509  */
510 @nogc @safe bool needEscaping(String)(String value) nothrow pure if (isSomeString!String && is(ElementEncodingType!String : char))
511 {
512     for (size_t i=0; i<value.length; ++i) {
513         const c = value[i];
514         if (c == '\n' || c == '\r') {
515             return true;
516         }
517     }
518     return false;
519 }
520 
521 ///
522 unittest
523 {
524     assert("new\nline".needEscaping);
525     assert(!`i have \ slash`.needEscaping);
526     assert("i like\rcarriage\rreturns".needEscaping);
527     assert(!"just a text".needEscaping);
528 }
529 
530 /**
531  * Escapes string by replacing special symbols with escaped sequences.
532  * These symbols are: '\\' (backslash), '\n' (newline), '\r' (carriage return) and '\t' (tab).
533  * Returns: Escaped string.
534  * See_Also: $(D unescapeValue)
535  */
536 @trusted String escapeValue(String)(String value) pure if (isSomeString!String && is(ElementEncodingType!String : char)) {
537     return value.replace("\\", `\\`.to!String).replace("\n", `\n`.to!String).replace("\r", `\r`.to!String).replace("\t", `\t`.to!String);
538 }
539 
540 ///
541 unittest
542 {
543     assert("a\\next\nline\top".escapeValue() == `a\\next\nline\top`); // notice how the string on the right is raw.
544     assert("a\\next\nline\top".dup.escapeValue() == `a\\next\nline\top`.dup);
545 }
546 
547 
548 /**
549  * Unescape value. If value does not need unescaping this function returns original value.
550  * Params:
551  *  value = string to unescape
552  *  pairs = pairs of escaped characters and their unescaped forms.
553  */
554 @trusted inout(char)[] doUnescape(inout(char)[] value, in Tuple!(char, char)[] pairs) nothrow pure {
555     //little optimization to avoid unneeded allocations.
556     size_t i = 0;
557     for (; i < value.length; i++) {
558         if (value[i] == '\\') {
559             break;
560         }
561     }
562     if (i == value.length) {
563         return value;
564     }
565 
566     auto toReturn = appender!(typeof(value))();
567     toReturn.put(value[0..i]);
568 
569     for (; i < value.length; i++) {
570         if (value[i] == '\\' && i+1 < value.length) {
571             const char c = value[i+1];
572             auto t = pairs.find!"a[0] == b[0]"(tuple(c,c));
573             if (!t.empty) {
574                 toReturn.put(t.front[1]);
575                 i++;
576                 continue;
577             }
578         }
579         toReturn.put(value[i]);
580     }
581     return toReturn.data;
582 }
583 
584 unittest
585 {
586     enum Tuple!(char, char)[] pairs = [tuple('\\', '\\')];
587     static assert(is(typeof(doUnescape("", pairs)) == string));
588     static assert(is(typeof(doUnescape("".dup, pairs)) == char[]));
589 }
590 
591 
592 /**
593  * Unescapes string. You should unescape values returned by library before displaying until you want keep them as is (e.g., to allow user to edit values in escaped form).
594  * Returns: Unescaped string.
595  * See_Also: $(D escapeValue), $(D doUnescape)
596  */
597 @safe inout(char)[] unescapeValue(inout(char)[] value) nothrow pure
598 {
599     static immutable Tuple!(char, char)[] pairs = [
600        tuple('s', ' '),
601        tuple('n', '\n'),
602        tuple('r', '\r'),
603        tuple('t', '\t'),
604        tuple('\\', '\\')
605     ];
606     return doUnescape(value, pairs);
607 }
608 
609 ///
610 unittest
611 {
612     assert(`a\\next\nline\top`.unescapeValue() == "a\\next\nline\top"); // notice how the string on the left is raw.
613     assert(`\\next\nline\top`.unescapeValue() == "\\next\nline\top");
614     string value = `nounescape`;
615     assert(value.unescapeValue() is value); //original is returned.
616     assert(`a\\next\nline\top`.dup.unescapeValue() == "a\\next\nline\top".dup);
617 }