JsoupCssInliner.java
package fr.sii.ogham.html.inliner.impl.jsoup;
import static fr.sii.ogham.core.util.HtmlUtils.getCssUrlFunctions;
import static fr.sii.ogham.core.util.HtmlUtils.relativize;
import static fr.sii.ogham.html.inliner.impl.jsoup.CssInlineUtils.isInlineModeAllowed;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import fr.sii.ogham.core.util.CssUrlFunction;
import fr.sii.ogham.html.inliner.CssInliner;
import fr.sii.ogham.html.inliner.CssInlinerConstants.InlineModes;
import fr.sii.ogham.html.inliner.ExternalCss;
public class JsoupCssInliner implements CssInliner {
private static final Logger LOG = LoggerFactory.getLogger(JsoupCssInliner.class);
private static final String HREF_ATTR = "href";
private static final String TEMP_STYLE_ATTR = "data-cssstyle";
private static final String STYLE_ATTR = "style";
private static final String STYLE_TAG = "style";
private static final String CSS_LINKS_SELECTOR = "link[rel*=\"stylesheet\"], link[type=\"text/css\"], link[href$=\".css\"]";
private static final Pattern NEW_LINES = Pattern.compile("\n");
private static final Pattern COMMENTS = Pattern.compile("/\\*.*?\\*/");
private static final Pattern SPACES = Pattern.compile(" +");
private static final String QUOTE_ENTITY = """;
@Override
public String inline(String htmlContent, List<ExternalCss> cssContents) {
Document doc = Jsoup.parse(htmlContent);
internStyles(doc, cssContents);
String stylesheet = fetchStyles(doc);
extractStyles(doc, stylesheet);
applyStyles(doc);
return doc.outerHtml();
}
/**
* Applies the styles to a <code>data-cssstyle</code> attribute. This is
* because the styles need to be applied sequentially, but before the
* <code>style</code> defined for the element inline.
*
* @param doc
* the html document
*/
private static void extractStyles(Document doc, String stylesheet) {
String cleanedStylesheet = ignoreAtRules(stylesheet);
cleanedStylesheet = NEW_LINES.matcher(cleanedStylesheet).replaceAll("");
cleanedStylesheet = COMMENTS.matcher(cleanedStylesheet).replaceAll("");
cleanedStylesheet = SPACES.matcher(cleanedStylesheet).replaceAll(" ");
String styleRules = cleanedStylesheet.trim();
String delims = "{}";
StringTokenizer st = new StringTokenizer(styleRules, delims);
while (st.countTokens() > 1) {
String selector = st.nextToken();
String properties = st.nextToken();
Elements selectedElements = doc.select(selector.trim());
for (Element selElem : selectedElements) {
String oldProperties = selElem.attr(TEMP_STYLE_ATTR);
selElem.attr(TEMP_STYLE_ATTR, oldProperties.length() > 0 ? concatenateProperties(oldProperties, properties) : properties);
}
}
}
/**
* Replace link tags with style tags in order to keep the same inclusion
* order
*
* @param doc
* the html document
* @param cssContents
* the list of external css files with their content
*/
private static void internStyles(Document doc, List<ExternalCss> cssContents) {
Elements els = doc.select(CSS_LINKS_SELECTOR);
for (Element e : els) {
if (isInlineModeAllowed(e, InlineModes.STYLE_ATTR)) {
String path = e.attr(HREF_ATTR);
ExternalCss css = getCss(cssContents, path);
if (css != null) {
Element style = new Element(Tag.valueOf(STYLE_TAG), "");
style.appendChild(new DataNode(getCssContent(css)));
e.replaceWith(style);
}
}
}
}
private static ExternalCss getCss(List<ExternalCss> cssContents, String path) {
for (ExternalCss css : cssContents) {
if (css.getPath().getOriginalPath().contains(path)) {
return css;
}
}
return null;
}
/**
* Generates a stylesheet from an html document
*
* @param doc
* the html document
* @return a string representing the stylesheet.
*/
private static String fetchStyles(Document doc) {
Elements els = doc.select(STYLE_TAG);
StringBuilder styles = new StringBuilder();
for (Element e : els) {
if (isInlineModeAllowed(e, InlineModes.STYLE_ATTR)) {
styles.append(e.data());
e.remove();
}
}
return styles.toString();
}
/**
* Transfers styles from the <code>data-cssstyle</code> attribute to the
* <code>style</code> attribute.
*
* @param doc
* the html document
*/
private static void applyStyles(Document doc) {
Elements allStyledElements = doc.getElementsByAttribute(TEMP_STYLE_ATTR);
for (Element e : allStyledElements) {
if (isInlineModeAllowed(e, InlineModes.STYLE_ATTR)) {
String newStyle = e.attr(TEMP_STYLE_ATTR);
String oldStyle = e.attr(STYLE_ATTR);
e.attr(STYLE_ATTR, (trimAll(newStyle) + ";" + trimAll(oldStyle)).replaceAll(";+", ";").trim());
}
e.removeAttr(TEMP_STYLE_ATTR);
}
}
private static String concatenateProperties(String oldProp, String newProp) {
String prop = oldProp;
if (!prop.endsWith(";")) {
prop += ";";
}
return trimAll(prop) + " " + trimAll(newProp) + ";";
}
private static String trimAll(String str) {
return str.replaceAll("\\s+", " ").trim();
}
private static String ignoreAtRules(String stylesheet) {
StringBuilder sb = new StringBuilder();
AtRuleParserContext ctx = new AtRuleParserContext();
for (int i=0 ; i<stylesheet.length() ; i++) {
char c = stylesheet.charAt(i);
updateLineNumberIfNewLine(ctx, c);
markAsStartOfAtRuleIfAtChar(ctx, c);
markAsStartOfNestedAtRuleIfAlreadyInAtRuleAndIsOpeningBracket(ctx, c);
markAsEndOfNestedAtRuleIfAlreadyInAtRuleAndIsClosingBracket(ctx, c);
if (ignoreAtRuleIfAtEndOfAtRule(ctx, c)) {
continue;
}
updateStylesAndAtRuleContent(ctx, sb, c);
}
return sb.toString();
}
private static boolean ignoreAtRuleIfAtEndOfAtRule(AtRuleParserContext ctx, char c) {
if (ctx.inAtRule && !ctx.inNestedAtRule && c == ';') {
ctx.inAtRule = false;
LOG.warn("{} rule is not handled by JsoupCssInliner implementation. Line {}:'{}' is skipped", rulename(ctx.rule), ctx.startLineOfCurrentAtRule, ctx.rule);
return true;
}
if (ctx.inAtRule && ctx.inNestedAtRule && ctx.numberOfOpenedAtRules == 0) {
ctx.inAtRule = false;
ctx.inNestedAtRule = false;
LOG.warn("{} rule is not handled by JsoupCssInliner implementation. Lines {}-{} are skipped", rulename(ctx.rule), ctx.startLineOfCurrentAtRule, ctx.line);
return true;
}
return false;
}
private static void updateStylesAndAtRuleContent(AtRuleParserContext ctx, StringBuilder sb, char c) {
if (!ctx.inAtRule) {
sb.append(c);
ctx.rule = new StringBuilder();
} else {
ctx.rule.append(c);
}
}
private static void markAsEndOfNestedAtRuleIfAlreadyInAtRuleAndIsClosingBracket(AtRuleParserContext ctx, char c) {
if (ctx.inAtRule && ctx.inNestedAtRule && c == '}') {
ctx.numberOfOpenedAtRules--;
}
}
private static void markAsStartOfNestedAtRuleIfAlreadyInAtRuleAndIsOpeningBracket(AtRuleParserContext ctx, char c) {
if (ctx.inAtRule && c == '{') {
ctx.inNestedAtRule = true;
ctx.numberOfOpenedAtRules++;
}
}
private static void markAsStartOfAtRuleIfAtChar(AtRuleParserContext ctx, char c) {
if (c == '@' && !ctx.inAtRule) {
ctx.inAtRule = true;
ctx.startLineOfCurrentAtRule = ctx.line;
}
}
private static void updateLineNumberIfNewLine(AtRuleParserContext ctx, char c) {
if (c == '\n') {
ctx.line++;
}
}
private static String rulename(StringBuilder rule) {
StringBuilder name = new StringBuilder();
for (int i=0 ; i<rule.length() ; i++) {
char c = rule.charAt(i);
if (c != '@' && c != '-' && !Character.isAlphabetic(c) && !Character.isDigit(c)) {
break;
}
name.append(c);
}
return name.toString();
}
private static String getCssContent(ExternalCss css) {
String content = css.getContent();
return updateRelativeUrls(content, css);
}
private static String updateRelativeUrls(String content, ExternalCss css) {
String newContent = content;
for (CssUrlFunction match : getCssUrlFunctions(content, QUOTE_ENTITY)) {
newContent = match.rewriteUrl(newContent, relativize(css.getPath().getOriginalPath(), match.getUrl()));
}
return newContent;
}
private static class AtRuleParserContext {
protected int line;
protected int startLineOfCurrentAtRule;
protected boolean inAtRule;
protected boolean inNestedAtRule;
protected int numberOfOpenedAtRules;
protected StringBuilder rule;
public AtRuleParserContext() {
super();
this.line = 1;
this.startLineOfCurrentAtRule = 0;
this.inAtRule = false;
this.inNestedAtRule = false;
this.numberOfOpenedAtRules = 0;
this.rule = new StringBuilder();
}
}
}