View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.shardingsphere.proxy.backend.postgresql.handler.admin.executor.variable.charset;
19  
20  import org.apache.shardingsphere.infra.database.core.metadata.database.enums.QuoteCharacter;
21  
22  import java.nio.charset.Charset;
23  import java.nio.charset.StandardCharsets;
24  import java.nio.charset.UnsupportedCharsetException;
25  import java.util.HashMap;
26  import java.util.Map;
27  import java.util.function.Supplier;
28  
29  /**
30   * Character sets defined in PostgreSQL.
31   * <a href="https://www.postgresql.org/docs/14/multibyte.html">24.3. Character Set Support</a>
32   */
33  public enum PostgreSQLCharacterSets {
34      
35      SQL_ASCII(() -> StandardCharsets.US_ASCII),
36      EUC_JP(() -> Charset.forName("EUC_JP")),
37      EUC_CN(() -> Charset.forName("EUC_CN")),
38      EUC_KR(() -> Charset.forName("EUC_KR")),
39      EUC_TW(() -> Charset.forName("EUC_TW")),
40      EUC_JIS_2004(() -> Charset.forName("EUC_JIS_2004")),
41      UTF8(() -> StandardCharsets.UTF_8, "Unicode", "UTF_8"),
42      MULE_INTERNAL(() -> Charset.forName("MULE_INTERNAL")),
43      LATIN1(() -> StandardCharsets.ISO_8859_1, "ISO88591"),
44      LATIN2(() -> Charset.forName("LATIN2"), "ISO88592"),
45      LATIN3(() -> Charset.forName("LATIN3"), "ISO88593"),
46      LATIN4(() -> Charset.forName("LATIN4"), "ISO88594"),
47      LATIN5(() -> Charset.forName("LATIN5"), "ISO88599"),
48      LATIN6(() -> Charset.forName("ISO-8859-10"), "ISO885910"),
49      LATIN7(() -> Charset.forName("ISO-8859-13"), "ISO885913"),
50      LATIN8(() -> Charset.forName("ISO-8859-14"), "ISO885914"),
51      LATIN9(() -> Charset.forName("LATIN9"), "ISO885915"),
52      LATIN10(() -> Charset.forName("LATIN10"), "ISO885916"),
53      WIN1256(() -> Charset.forName("WINDOWS-1256")),
54      WIN1258(() -> Charset.forName("WINDOWS-1258"), "ABC", "TCVN", "TCVN5712", "VSCII"),
55      WIN866(() -> Charset.forName("WINDOWS-866"), "ALT"),
56      WIN874(() -> Charset.forName("WINDOWS-874")),
57      KOI8R(() -> Charset.forName("KOI8-R")),
58      WIN1251(() -> Charset.forName("WINDOWS-1251"), "WIN"),
59      WIN1252(() -> Charset.forName("WINDOWS-1252")),
60      ISO_8859_5(() -> Charset.forName("ISO-8859-5")),
61      ISO_8859_6(() -> Charset.forName("ISO-8859-6")),
62      ISO_8859_7(() -> Charset.forName("ISO-8859-7")),
63      ISO_8859_8(() -> Charset.forName("ISO-8859-8")),
64      WIN1250(() -> Charset.forName("WINDOWS-1250")),
65      WIN1253(() -> Charset.forName("WINDOWS-1253")),
66      WIN1254(() -> Charset.forName("WINDOWS-1254")),
67      WIN1255(() -> Charset.forName("WINDOWS-1255")),
68      WIN1257(() -> Charset.forName("WINDOWS-1257")),
69      KOI8U(() -> Charset.forName("KOI8-U"), "KOI8"),
70      SJIS(() -> Charset.forName("SJIS"), "Mskanji", "ShiftJIS", "WIN932", "Windows932"),
71      BIG5(() -> Charset.forName("BIG5"), "WIN950", "Windows950"),
72      GBK(() -> Charset.forName("GBK"), "WIN936", "Windows936"),
73      UHC(() -> Charset.forName("UHC"), "WIN949", "Windows949"),
74      GB18030(() -> Charset.forName("GB18030")),
75      JOHAB(() -> Charset.forName("JOHAB")),
76      SHIFT_JIS_2004(() -> Charset.forName("SHIFT_JIS"));
77      
78      private static final Map<String, PostgreSQLCharacterSets> CHARACTER_SETS_MAP;
79      
80      static {
81          Map<String, PostgreSQLCharacterSets> map = new HashMap<>(128, 1F);
82          for (PostgreSQLCharacterSets each : values()) {
83              map.put(each.name(), each);
84              for (String eachAlias : each.aliases) {
85                  map.put(eachAlias.toUpperCase(), each);
86              }
87          }
88          CHARACTER_SETS_MAP = map;
89      }
90      
91      private final Charset charset;
92      
93      private final String[] aliases;
94      
95      PostgreSQLCharacterSets(final Supplier<Charset> charsetSupplier, final String... aliases) {
96          Charset result = null;
97          try {
98              result = charsetSupplier.get();
99          } catch (final UnsupportedCharsetException ignored) {
100         }
101         charset = result;
102         this.aliases = aliases;
103     }
104     
105     /**
106      * Find corresponding {@link Charset} by charset name defined in PostgreSQL.
107      *
108      * @param charsetName charset name defined in PostgreSQL
109      * @return corresponding {@link Charset}
110      */
111     public static Charset findCharacterSet(final String charsetName) {
112         String formattedCharsetName = formatValue(charsetName);
113         PostgreSQLCharacterSets result = CHARACTER_SETS_MAP.get(formattedCharsetName.toUpperCase());
114         return null == result || null == result.charset ? Charset.forName(formattedCharsetName) : result.charset;
115     }
116     
117     private static String formatValue(final String value) {
118         return QuoteCharacter.SINGLE_QUOTE.isWrapped(value) || QuoteCharacter.QUOTE.isWrapped(value) ? value.substring(1, value.length() - 1) : value.trim();
119     }
120 }