CharsetMapToCharacterEncodingGroupDataCodingProvider.java

package fr.sii.ogham.sms.sender.impl.cloudhopper.preparator;

import static com.cloudhopper.commons.charset.CharsetUtil.NAME_GSM;
import static com.cloudhopper.commons.charset.CharsetUtil.NAME_GSM7;
import static com.cloudhopper.commons.charset.CharsetUtil.NAME_GSM8;
import static com.cloudhopper.commons.charset.CharsetUtil.NAME_ISO_8859_1;
import static com.cloudhopper.commons.charset.CharsetUtil.NAME_PACKED_GSM;
import static com.cloudhopper.commons.charset.CharsetUtil.NAME_UCS_2;
import static com.cloudhopper.commons.gsm.DataCoding.CHAR_ENC_8BIT;
import static com.cloudhopper.commons.gsm.DataCoding.CHAR_ENC_DEFAULT;
import static com.cloudhopper.commons.gsm.DataCoding.CHAR_ENC_LATIN1;
import static com.cloudhopper.commons.gsm.DataCoding.CHAR_ENC_UCS2;

import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import com.cloudhopper.commons.charset.Charset;
import com.cloudhopper.commons.charset.CharsetUtil;
import com.cloudhopper.commons.gsm.DataCoding;

import fr.sii.ogham.sms.encoder.Encoded;
import fr.sii.ogham.sms.sender.impl.cloudhopper.encoder.NamedCharset;
import fr.sii.ogham.sms.sender.impl.cloudhopper.exception.DataCodingException;
import fr.sii.ogham.sms.sender.impl.cloudhopper.exception.UnsupportedCharsetException;

/**
 * Provide a Data Coding Scheme according to charset used to encode the message.
 * The resulting encoding is <b>Character Encoding Group</b>:
 * 
 * Bit 7 6 5 4 3 2 1 0<br>
 * <br>
 * Bits 7..4 contain the "Coding Group Bits" which control what values are
 * contained in bits 3..0 OR even 5..0<br>
 * <br>
 * <b>0000: Character Encoding Group</b>
 * <ul>
 * <li>Bits 0,1,2,3 Represent 16 Language Encodings</li>
 * </ul>
 * 
 * 
 * @author Aurélien Baudet
 *
 */
public class CharsetMapToCharacterEncodingGroupDataCodingProvider implements DataCodingProvider {
	private final boolean failIfUnknown;
	private final Map<String, Byte> alphabetIndexedByCharsetName;

	/**
	 * Provides {@link DataCoding} based on the charset used to encode the
	 * message.
	 * 
	 * <p>
	 * The default map is used (charset name {@literal ->} alphabet):
	 * <ul>
	 * <li>{@link CharsetUtil#NAME_GSM7} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_DEFAULT}</li>
	 * <li>{@link CharsetUtil#NAME_PACKED_GSM} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_DEFAULT}</li>
	 * <li>{@link CharsetUtil#NAME_GSM} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_8BIT}</li>
	 * <li>{@link CharsetUtil#NAME_GSM8} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_8BIT}</li>
	 * <li>{@link CharsetUtil#NAME_ISO_8859_1} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_LATIN1}</li>
	 * <li>{@link CharsetUtil#NAME_UCS_2} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_UCS2}</li>
	 * </ul>
	 * 
	 * @param failIfUnknown
	 *            if true it throws {@link UnsupportedCharsetException}, if
	 *            false is returns null to let other
	 *            {@link DataCodingProvider}(s) being executed.
	 */
	public CharsetMapToCharacterEncodingGroupDataCodingProvider(boolean failIfUnknown) {
		super();
		this.failIfUnknown = failIfUnknown;
		this.alphabetIndexedByCharsetName = defaultMap();
	}

	@Override
	public DataCoding provide(Encoded encoded) throws DataCodingException {
		NamedCharset charset = NamedCharset.from(encoded.getCharsetName());
		Byte encoding = alphabetIndexedByCharsetName.get(charset.getCharsetName());
		if (encoding == null) {
			if (failIfUnknown) {
				throw new UnsupportedCharsetException(encoded.getCharsetName() + " charset not supported for Character Encoding Group Data Coding Scheme", encoded);
			}
			return null;
		}
		return DataCoding.createCharacterEncodingGroup(encoding);
	}

	/**
	 * Default mapping used to determine {@link DataCoding} encoding value from
	 * {@link Charset}:
	 * <ul>
	 * <li>{@link CharsetUtil#NAME_GSM7} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_DEFAULT}</li>
	 * <li>{@link CharsetUtil#NAME_PACKED_GSM} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_DEFAULT}</li>
	 * <li>{@link CharsetUtil#NAME_GSM} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_8BIT}</li>
	 * <li>{@link CharsetUtil#NAME_GSM8} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_8BIT}</li>
	 * <li>{@link CharsetUtil#NAME_ISO_8859_1} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_LATIN1}</li>
	 * <li>{@link CharsetUtil#NAME_UCS_2} {@literal ->}
	 * {@link DataCoding#CHAR_ENC_UCS2}</li>
	 * </ul>
	 * 
	 * 
	 * @return the mapping
	 */
	public static Map<String, Byte> defaultMap() {
		Map<String, Byte> map = new HashMap<>();
		// @formatter:off
		map.put(NAME_GSM, 			CHAR_ENC_8BIT);
		map.put(NAME_GSM8, 			CHAR_ENC_8BIT);
		// SMPP v3.3 defines 0 for GSM 7-bit packed.
		// Since SMPP v3.4, meaning of DCS=0 is ambiguous...
		// But it is the only possible value for GSM 7-bit packed 
		map.put(NAME_GSM7, 			CHAR_ENC_DEFAULT);
		map.put(NAME_PACKED_GSM, 	CHAR_ENC_DEFAULT);
		map.put(NAME_ISO_8859_1, 	CHAR_ENC_LATIN1);
		map.put(NAME_UCS_2, 		CHAR_ENC_UCS2);
		// TODO: if other charset are used => which data coding values to use ? Where to find the information ???
//		map.put(CharsetUtil.NAME_AIRWIDE_GSM, DataCoding.);
//		map.put(CharsetUtil.NAME_AIRWIDE_IA5, DataCoding.);
//		map.put(CharsetUtil.NAME_ISO_8859_15, DataCoding.);
//		map.put(CharsetUtil.NAME_MODIFIED_UTF8, DataCoding.);
//		map.put(CharsetUtil.NAME_TMOBILENL_GSM, DataCoding.);
//		map.put(CharsetUtil.NAME_UTF_8, DataCoding.);
//		map.put(CharsetUtil.NAME_VFD2_GSM, DataCoding.);
//		map.put(CharsetUtil.NAME_VFTR_GSM, DataCoding.);

		
//		map.put(CharsetUtil.NAME_GSM, 		DataCoding.CHAR_ENC_8BIT);
//		map.put(CharsetUtil.NAME_GSM8, 		DataCoding.CHAR_ENC_8BIT);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_8BITA);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_CYRLLIC);
//		map.put(CharsetUtil.NAME_GSM7, 		DataCoding.CHAR_ENC_DEFAULT);
//		map.put(CharsetUtil.NAME_PACKED_GSM, 		DataCoding.CHAR_ENC_DEFAULT);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_EXKANJI);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_HEBREW);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_IA5);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_JIS);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_KSC5601);
//		map.put(CharsetUtil.NAME_ISO_8859_1, 		DataCoding.CHAR_ENC_LATIN1);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_MUSIC);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_PICTO);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_RSRVD);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_RSRVD2);
//		map.put(CharsetUtil., 		DataCoding.CHAR_ENC_RSRVD3);
//		map.put(CharsetUtil.NAME_UCS_2, 		DataCoding.CHAR_ENC_UCS2);
		// @formatter:on
		return Collections.unmodifiableMap(map);
	}

}