| 1 | package fr.sii.ogham.core.util; | |
| 2 | ||
| 3 | import static java.util.Arrays.asList; | |
| 4 | ||
| 5 | import java.net.URI; | |
| 6 | import java.net.URISyntaxException; | |
| 7 | import java.nio.file.Path; | |
| 8 | import java.nio.file.Paths; | |
| 9 | import java.util.ArrayList; | |
| 10 | import java.util.Iterator; | |
| 11 | import java.util.List; | |
| 12 | import java.util.StringJoiner; | |
| 13 | import java.util.regex.Matcher; | |
| 14 | import java.util.regex.Pattern; | |
| 15 | ||
| 16 | import org.jsoup.Jsoup; | |
| 17 | import org.jsoup.nodes.Document; | |
| 18 | import org.jsoup.nodes.Element; | |
| 19 | import org.jsoup.select.Elements; | |
| 20 | import org.slf4j.Logger; | |
| 21 | import org.slf4j.LoggerFactory; | |
| 22 | ||
| 23 | /** | |
| 24 | * Utility class for handling HTML content. It helps for repetitive tasks for | |
| 25 | * manipulating HTML. | |
| 26 | * | |
| 27 | * @author Aurélien Baudet | |
| 28 | * | |
| 29 | */ | |
| 30 | public final class HtmlUtils { | |
| 31 | private static final Logger LOG = LoggerFactory.getLogger(HtmlUtils.class); | |
| 32 | ||
| 33 | private static final Pattern HTML_PATTERN = Pattern.compile("<html", Pattern.CASE_INSENSITIVE); | |
| 34 | private static final String CSS_LINKS_SELECTOR = "link[rel*=\"stylesheet\"], link[type=\"text/css\"], link[href$=\".css\"]"; | |
| 35 | private static final String HREF_ATTR = "href"; | |
| 36 | private static final String IMG_SELECTOR = "img"; | |
| 37 | private static final String SRC_ATTR = "src"; | |
| 38 | private static final Pattern URL_PATTERN = Pattern.compile("^https?://.+$", Pattern.CASE_INSENSITIVE); | |
| 39 | private static final Pattern URI_INVALID_CHARS = Pattern.compile("\\\\'"); | |
| 40 | private static final String URI_ESCAPE = "''"; | |
| 41 | private static final Pattern QUOTE_ENTITY = Pattern.compile("""); | |
| 42 | private static final String UNQUOTED_FORM = "(?<startunquoted>\\s*url\\s*[(]\\s*)(?<urlunquoted>(?:\\\\[()\\s]|[^()\\s])+)(?<endunquoted>\\s*[)]\\s*(?:[\\s;,'\"]|$))"; | |
| 43 | private static final String QUOTED_FORM = "(?<start#QUOTENAME#>\\s*url\\s*[(]\\s*)(?<quote#QUOTENAME#>#QUOTE#)(?<url#QUOTENAME#>(?:\\\\#QUOTE#|(?!#QUOTE#).)+)#QUOTE#(?<end#QUOTENAME#>\\s*[)]\\s*(?:[\\s;,'\"]|$))"; | |
| 44 | ||
| 45 | /** | |
| 46 | * Regular expression that matches CSS properties for image inclusions such | |
| 47 | * as: | |
| 48 | * <ul> | |
| 49 | * <li>{@code background: <value>;}</li> | |
| 50 | * <li>{@code background-image: <value>};</li> | |
| 51 | * <li>{@code list-style: <value>};</li> | |
| 52 | * <li>{@code list-style-image: <value>};</li> | |
| 53 | * <li>{@code cursor: <value>};</li> | |
| 54 | * </ul> | |
| 55 | * | |
| 56 | * <p> | |
| 57 | * The pattern provides the following named capturing groups: | |
| 58 | * <ul> | |
| 59 | * <li>{@code "property"}: matches the property part (property name, spaces | |
| 60 | * and {@literal :})</li> | |
| 61 | * <li>{@code "propertyname"}: matches the property name (such as | |
| 62 | * {@code background})</li> | |
| 63 | * <li>{@code "value"}: matches the property value (without final | |
| 64 | * {@literal ;})</li> | |
| 65 | * </ul> | |
| 66 | */ | |
| 67 | public static final Pattern CSS_IMAGE_PROPERTIES_PATTERN = Pattern.compile("(?<property>(?<propertyname>((background|list-style)(-image)?)|cursor)\\s*:)(?<value>[^;}>]+)", | |
| 68 | Pattern.MULTILINE | Pattern.DOTALL | Pattern.CASE_INSENSITIVE); | |
| 69 | ||
| 70 | /** | |
| 71 | * Indicates if the provided content is HTML or not. It is considered HTML | |
| 72 | * only if it is a whole document. Any partial HTML content won't be | |
| 73 | * considered as HTML. | |
| 74 | * | |
| 75 | * @param content | |
| 76 | * the content to test | |
| 77 | * @return true if it is HTML, false otherwise | |
| 78 | */ | |
| 79 | public static boolean isHtml(String content) { | |
| 80 |
9
1. isHtml : replaced boolean return with false for fr/sii/ogham/core/util/HtmlUtils::isHtml → SURVIVED 2. isHtml : replaced boolean return with false for fr/sii/ogham/core/util/HtmlUtils::isHtml → NO_COVERAGE 3. isHtml : replaced boolean return with true for fr/sii/ogham/core/util/HtmlUtils::isHtml → SURVIVED 4. isHtml : replaced boolean return with true for fr/sii/ogham/core/util/HtmlUtils::isHtml → NO_COVERAGE 5. isHtml : replaced boolean return with false for fr/sii/ogham/core/util/HtmlUtils::isHtml → TIMED_OUT 6. isHtml : replaced boolean return with true for fr/sii/ogham/core/util/HtmlUtils::isHtml → TIMED_OUT 7. isHtml : replaced boolean return with false for fr/sii/ogham/core/util/HtmlUtils::isHtml → KILLED 8. isHtml : replaced boolean return with false for fr/sii/ogham/core/util/HtmlUtils::isHtml → KILLED 9. isHtml : replaced boolean return with true for fr/sii/ogham/core/util/HtmlUtils::isHtml → KILLED |
return HTML_PATTERN.matcher(content).find(); |
| 81 | } | |
| 82 | ||
| 83 | /** | |
| 84 | * Finds all CSS file inclusions (looks for <code>link</code> tags for | |
| 85 | * stylesheet files). Returns only the path or URL to the CSS file. If the | |
| 86 | * several CSS inclusions have the same path, the path is present in the | |
| 87 | * list only one time. | |
| 88 | * | |
| 89 | * @param htmlContent | |
| 90 | * the html content that may contain external CSS files | |
| 91 | * @return the list of found CSS inclusions (paths only) or empty if nothing | |
| 92 | * found | |
| 93 | */ | |
| 94 | public static List<String> getDistinctCssUrls(String htmlContent) { | |
| 95 | Document doc = Jsoup.parse(htmlContent); | |
| 96 | Elements els = doc.select(CSS_LINKS_SELECTOR); | |
| 97 | List<String> cssFiles = new ArrayList<>(els.size()); | |
| 98 | for (Element e : els) { | |
| 99 | String path = e.attr(HREF_ATTR); | |
| 100 |
3
1. getDistinctCssUrls : negated conditional → NO_COVERAGE 2. getDistinctCssUrls : negated conditional → KILLED 3. getDistinctCssUrls : negated conditional → KILLED |
if (!cssFiles.contains(path)) { |
| 101 | cssFiles.add(path); | |
| 102 | } | |
| 103 | } | |
| 104 |
4
1. getDistinctCssUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctCssUrls → NO_COVERAGE 2. getDistinctCssUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctCssUrls → TIMED_OUT 3. getDistinctCssUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctCssUrls → KILLED 4. getDistinctCssUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctCssUrls → KILLED |
return cssFiles; |
| 105 | } | |
| 106 | ||
| 107 | /** | |
| 108 | * Finds all image inclusions (looks for <code>img</code> tags). Returns | |
| 109 | * only the path or URL to the image. If the several images have the same | |
| 110 | * path, the path is present in the list only one time. | |
| 111 | * | |
| 112 | * @param htmlContent | |
| 113 | * the html content that may contain image files | |
| 114 | * @return the list of found images (paths only) or empty if nothing found | |
| 115 | */ | |
| 116 | public static List<String> getDistinctImageUrls(String htmlContent) { | |
| 117 | Document doc = Jsoup.parse(htmlContent); | |
| 118 | Elements els = doc.select(IMG_SELECTOR); | |
| 119 | List<String> images = new ArrayList<>(els.size()); | |
| 120 | for (Element e : els) { | |
| 121 | String path = e.attr(SRC_ATTR); | |
| 122 |
3
1. getDistinctImageUrls : negated conditional → NO_COVERAGE 2. getDistinctImageUrls : negated conditional → KILLED 3. getDistinctImageUrls : negated conditional → KILLED |
if (!images.contains(path)) { |
| 123 | images.add(path); | |
| 124 | } | |
| 125 | } | |
| 126 |
5
1. getDistinctImageUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctImageUrls → SURVIVED 2. getDistinctImageUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctImageUrls → NO_COVERAGE 3. getDistinctImageUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctImageUrls → TIMED_OUT 4. getDistinctImageUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctImageUrls → KILLED 5. getDistinctImageUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctImageUrls → KILLED |
return images; |
| 127 | } | |
| 128 | ||
| 129 | /** | |
| 130 | * Finds all image inclusions from CSS properties. Returns only the path or | |
| 131 | * URL to the image. If the several images have the same path, the path is | |
| 132 | * present in the list only one time. | |
| 133 | * | |
| 134 | * <p> | |
| 135 | * It looks for: | |
| 136 | * <ul> | |
| 137 | * <li><code>background</code></li> | |
| 138 | * <li><code>background-image</code></li> | |
| 139 | * <li><code>list-style</code></li> | |
| 140 | * <li><code>list-style-image</code></li> | |
| 141 | * <li><code>cursor</code></li> | |
| 142 | * </ul> | |
| 143 | * | |
| 144 | * @param htmlContent | |
| 145 | * the html content that may contain image files | |
| 146 | * @return the list of found images (paths only) or empty if nothing found | |
| 147 | */ | |
| 148 | public static List<String> getDistinctCssImageUrls(String htmlContent) { | |
| 149 | List<String> urls = new ArrayList<>(); | |
| 150 | Matcher m = CSS_IMAGE_PROPERTIES_PATTERN.matcher(QUOTE_ENTITY.matcher(htmlContent).replaceAll("'")); | |
| 151 |
5
1. getDistinctCssImageUrls : negated conditional → NO_COVERAGE 2. getDistinctCssImageUrls : negated conditional → TIMED_OUT 3. getDistinctCssImageUrls : negated conditional → KILLED 4. getDistinctCssImageUrls : negated conditional → KILLED 5. getDistinctCssImageUrls : negated conditional → KILLED |
while (m.find()) { |
| 152 | for (CssUrlFunction url : getCssUrlFunctions(m.group("value"))) { | |
| 153 |
3
1. getDistinctCssImageUrls : negated conditional → NO_COVERAGE 2. getDistinctCssImageUrls : negated conditional → KILLED 3. getDistinctCssImageUrls : negated conditional → KILLED |
if (!urls.contains(url.getUrl())) { |
| 154 | urls.add(url.getUrl()); | |
| 155 | } | |
| 156 | } | |
| 157 | } | |
| 158 |
5
1. getDistinctCssImageUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctCssImageUrls → NO_COVERAGE 2. getDistinctCssImageUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctCssImageUrls → SURVIVED 3. getDistinctCssImageUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctCssImageUrls → TIMED_OUT 4. getDistinctCssImageUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctCssImageUrls → KILLED 5. getDistinctCssImageUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getDistinctCssImageUrls → KILLED |
return urls; |
| 159 | } | |
| 160 | ||
| 161 | /** | |
| 162 | * Parse the CSS property value that may contain one or several | |
| 163 | * {@code url()} CSS function(s). | |
| 164 | * | |
| 165 | * Each element of the returned list provides the following information: | |
| 166 | * <ul> | |
| 167 | * <li>{@code "source"}: the whole match of the {@code url()} function</li> | |
| 168 | * <li>{@code "start"}: matches the {@code url(} part (without quote, spaces | |
| 169 | * are preserved)</li> | |
| 170 | * <li>{@code "end"}: matches the {@code )} part (without quote, spaces are | |
| 171 | * preserved)</li> | |
| 172 | * <li>{@code "url"}: the url (without surrounding quotes)</li> | |
| 173 | * <li>{@code "enclosingQuoteChar"}: either {@literal "} character, | |
| 174 | * {@literal '} character or empty string</li> | |
| 175 | * </ul> | |
| 176 | * | |
| 177 | * <strong>WARNING:</strong> This function doesn't attempt to validate the | |
| 178 | * URL at all. It just extracts the different parts for later parsing. If | |
| 179 | * either the URL or CSS property value or the {@code url()} function is | |
| 180 | * invalid, it may still return a value because it depends on the parsing | |
| 181 | * context. It may then return an invalid form. For example | |
| 182 | * {@code url('images/h'1.gif')} is not valid due to unscaped single quote, | |
| 183 | * however this method will return a result with {@code images/h'1.gif} as | |
| 184 | * URL. | |
| 185 | * | |
| 186 | * @param cssPropertyValue | |
| 187 | * the value of the CSS property | |
| 188 | * @param additionalEnclosingQuotes | |
| 189 | * allow additional forms such as | |
| 190 | * {@code url("http://some-url")} that may be used in | |
| 191 | * style attribute | |
| 192 | * @return the list of meta information about the matched urls | |
| 193 | */ | |
| 194 | public static List<CssUrlFunction> getCssUrlFunctions(String cssPropertyValue, String... additionalEnclosingQuotes) { | |
| 195 | List<String> possibleQuotes = new ArrayList<>(asList("'", "\"")); | |
| 196 | possibleQuotes.addAll(asList(additionalEnclosingQuotes)); | |
| 197 | Pattern cssUrlFuncPattern = generateUrlFuncPattern(possibleQuotes); | |
| 198 | List<CssUrlFunction> urls = new ArrayList<>(); | |
| 199 | Matcher urlMatcher = cssUrlFuncPattern.matcher(cssPropertyValue); | |
| 200 |
3
1. getCssUrlFunctions : negated conditional → NO_COVERAGE 2. getCssUrlFunctions : negated conditional → KILLED 3. getCssUrlFunctions : negated conditional → KILLED |
while (urlMatcher.find()) { |
| 201 | CssUrlFunction url = null; | |
| 202 |
6
1. getCssUrlFunctions : changed conditional boundary → SURVIVED 2. getCssUrlFunctions : changed conditional boundary → NO_COVERAGE 3. getCssUrlFunctions : negated conditional → NO_COVERAGE 4. getCssUrlFunctions : changed conditional boundary → KILLED 5. getCssUrlFunctions : negated conditional → KILLED 6. getCssUrlFunctions : negated conditional → KILLED |
for (int i = 0; i < possibleQuotes.size(); i++) { |
| 203 |
3
1. getCssUrlFunctions : negated conditional → NO_COVERAGE 2. getCssUrlFunctions : negated conditional → KILLED 3. getCssUrlFunctions : negated conditional → KILLED |
if (urlMatcher.group("quotedform" + i) != null) { |
| 204 | url = new CssUrlFunction(urlMatcher.group("quotedform" + i), urlMatcher.group("start" + i), urlMatcher.group("url" + i), urlMatcher.group("end" + i), possibleQuotes.get(i)); | |
| 205 | break; | |
| 206 | } | |
| 207 | } | |
| 208 |
3
1. getCssUrlFunctions : negated conditional → NO_COVERAGE 2. getCssUrlFunctions : negated conditional → KILLED 3. getCssUrlFunctions : negated conditional → KILLED |
if (urlMatcher.group("unquotedform") != null) { |
| 209 | url = new CssUrlFunction(urlMatcher.group("unquotedform"), urlMatcher.group("startunquoted"), urlMatcher.group("urlunquoted"), urlMatcher.group("endunquoted"), ""); | |
| 210 | } | |
| 211 |
3
1. getCssUrlFunctions : negated conditional → NO_COVERAGE 2. getCssUrlFunctions : negated conditional → KILLED 3. getCssUrlFunctions : negated conditional → KILLED |
if (url != null) { |
| 212 | urls.add(url); | |
| 213 | } | |
| 214 | } | |
| 215 |
3
1. getCssUrlFunctions : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getCssUrlFunctions → NO_COVERAGE 2. getCssUrlFunctions : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getCssUrlFunctions → KILLED 3. getCssUrlFunctions : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::getCssUrlFunctions → KILLED |
return urls; |
| 216 | } | |
| 217 | ||
| 218 | /** | |
| 219 | * Get the title of the HTML. If no <code>title</code> tag exists, then the | |
| 220 | * title is null. | |
| 221 | * | |
| 222 | * @param htmlContent | |
| 223 | * the HTML content that may contain a title | |
| 224 | * @return the title of the HTML or null if none | |
| 225 | */ | |
| 226 | public static String getTitle(String htmlContent) { | |
| 227 | Document doc = Jsoup.parse(htmlContent); | |
| 228 | Elements titleNode = doc.select("head > title"); | |
| 229 |
8
1. getTitle : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::getTitle → NO_COVERAGE 2. getTitle : negated conditional → NO_COVERAGE 3. getTitle : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::getTitle → TIMED_OUT 4. getTitle : negated conditional → TIMED_OUT 5. getTitle : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::getTitle → KILLED 6. getTitle : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::getTitle → KILLED 7. getTitle : negated conditional → KILLED 8. getTitle : negated conditional → KILLED |
return titleNode.isEmpty() ? null : doc.title(); |
| 230 | } | |
| 231 | ||
| 232 | /** | |
| 233 | * The list of provided URLs are either relative or absolute. This method | |
| 234 | * returns only the list of relative URLs. | |
| 235 | * | |
| 236 | * <p> | |
| 237 | * The URL is considered absolute if it starts with {@code "http://"} or | |
| 238 | * {@code https://}. | |
| 239 | * | |
| 240 | * | |
| 241 | * @param urls | |
| 242 | * the urls (relative or absolute) | |
| 243 | * @return the relative urls only | |
| 244 | */ | |
| 245 | public static List<String> skipExternalUrls(List<String> urls) { | |
| 246 |
5
1. skipExternalUrls : negated conditional → NO_COVERAGE 2. skipExternalUrls : negated conditional → TIMED_OUT 3. skipExternalUrls : negated conditional → KILLED 4. skipExternalUrls : negated conditional → KILLED 5. skipExternalUrls : negated conditional → KILLED |
for (Iterator<String> it = urls.iterator(); it.hasNext();) { |
| 247 | String url = it.next(); | |
| 248 |
3
1. skipExternalUrls : negated conditional → NO_COVERAGE 2. skipExternalUrls : negated conditional → KILLED 3. skipExternalUrls : negated conditional → KILLED |
if (URL_PATTERN.matcher(url).matches()) { |
| 249 |
3
1. skipExternalUrls : removed call to java/util/Iterator::remove → NO_COVERAGE 2. skipExternalUrls : removed call to java/util/Iterator::remove → KILLED 3. skipExternalUrls : removed call to java/util/Iterator::remove → KILLED |
it.remove(); |
| 250 | } | |
| 251 | } | |
| 252 |
5
1. skipExternalUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::skipExternalUrls → NO_COVERAGE 2. skipExternalUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::skipExternalUrls → SURVIVED 3. skipExternalUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::skipExternalUrls → TIMED_OUT 4. skipExternalUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::skipExternalUrls → KILLED 5. skipExternalUrls : replaced return value with Collections.emptyList for fr/sii/ogham/core/util/HtmlUtils::skipExternalUrls → KILLED |
return urls; |
| 253 | } | |
| 254 | ||
| 255 | /** | |
| 256 | * Generate a relative URL/path: | |
| 257 | * <ul> | |
| 258 | * <li>If {@code other} parameter is absolute, then return | |
| 259 | * {@code other}.</li> | |
| 260 | * <li>If {@code other} parameter is relative, then it merges {@code other} | |
| 261 | * into {@code base}. For example: | |
| 262 | * <ul> | |
| 263 | * <li>base="css/foo.css", other="bar.png" {@literal =>} returns | |
| 264 | * "css/bar.png"</li> | |
| 265 | * <li>base="css/foo.css", other="../images/bar.png" {@literal =>} returns | |
| 266 | * "images/bar.png"</li> | |
| 267 | * <li>base="http://some-url/css/foo.css", other="bar.png" {@literal =>} | |
| 268 | * returns "http://some-url/css/bar.png"</li> | |
| 269 | * <li>base="http://some-url/css/foo.css", other="../images/bar.png" | |
| 270 | * {@literal =>} returns "http://some-url/images/bar.png"</li> | |
| 271 | * </ul> | |
| 272 | * </li> | |
| 273 | * </ul> | |
| 274 | * | |
| 275 | * <p> | |
| 276 | * This method uses {@link #isRelativeUrl(String)} to determine if | |
| 277 | * {@code other} is relative or absolute. | |
| 278 | * | |
| 279 | * @param base | |
| 280 | * the base path/URL | |
| 281 | * @param other | |
| 282 | * the path/URL to relativize | |
| 283 | * @return the merge path/URL | |
| 284 | */ | |
| 285 | public static String relativize(String base, String other) { | |
| 286 |
3
1. relativize : negated conditional → NO_COVERAGE 2. relativize : negated conditional → KILLED 3. relativize : negated conditional → KILLED |
if (!isRelativeUrl(other)) { |
| 287 |
2
1. relativize : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::relativize → NO_COVERAGE 2. relativize : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::relativize → KILLED |
return other; |
| 288 | } | |
| 289 | Path basePath = Paths.get(base); | |
| 290 |
3
1. relativize : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::relativize → NO_COVERAGE 2. relativize : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::relativize → KILLED 3. relativize : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::relativize → KILLED |
return unescapeJavaUri(ResourceUtils.toResourcePath(basePath.resolveSibling(escapeForJavaUri(other)).normalize())); |
| 291 | } | |
| 292 | ||
| 293 | /** | |
| 294 | * Indicates if the URL is relative or not. | |
| 295 | * | |
| 296 | * <p> | |
| 297 | * Relative URLs may be: | |
| 298 | * <ul> | |
| 299 | * <li>{@code "relative/path"}</li> | |
| 300 | * <li>{@code "./relative/path"}</li> | |
| 301 | * <li>{@code "../relative/path"}</li> | |
| 302 | * </ul> | |
| 303 | * | |
| 304 | * <p> | |
| 305 | * On the contrary, any URL that matches one of the following condition is | |
| 306 | * absolute: | |
| 307 | * <ul> | |
| 308 | * <li>starts with a scheme or protocol (like {@code "http://"} or | |
| 309 | * {@code "classpath:"}</li> | |
| 310 | * <li>starts with a {@code "/"}</li> | |
| 311 | * </ul> | |
| 312 | * | |
| 313 | * @param url | |
| 314 | * the URL that may be relative or absolute | |
| 315 | * @return true if relative | |
| 316 | */ | |
| 317 | public static boolean isRelativeUrl(String url) { | |
| 318 | try { | |
| 319 |
3
1. isRelativeUrl : negated conditional → NO_COVERAGE 2. isRelativeUrl : negated conditional → KILLED 3. isRelativeUrl : negated conditional → KILLED |
if (url.startsWith("/")) { |
| 320 |
2
1. isRelativeUrl : replaced boolean return with true for fr/sii/ogham/core/util/HtmlUtils::isRelativeUrl → SURVIVED 2. isRelativeUrl : replaced boolean return with true for fr/sii/ogham/core/util/HtmlUtils::isRelativeUrl → NO_COVERAGE |
return false; |
| 321 | } | |
| 322 | URI u = new URI(escapeForJavaUri(url)); | |
| 323 |
6
1. isRelativeUrl : replaced boolean return with true for fr/sii/ogham/core/util/HtmlUtils::isRelativeUrl → NO_COVERAGE 2. isRelativeUrl : replaced boolean return with true for fr/sii/ogham/core/util/HtmlUtils::isRelativeUrl → SURVIVED 3. isRelativeUrl : negated conditional → NO_COVERAGE 4. isRelativeUrl : replaced boolean return with true for fr/sii/ogham/core/util/HtmlUtils::isRelativeUrl → KILLED 5. isRelativeUrl : negated conditional → KILLED 6. isRelativeUrl : negated conditional → KILLED |
return !u.isAbsolute(); |
| 324 | } catch (URISyntaxException e) { | |
| 325 | LOG.warn("Can't determine if '{}' url is relative or absolute => consider absolute", url); | |
| 326 | LOG.trace("", e); | |
| 327 |
1
1. isRelativeUrl : replaced boolean return with true for fr/sii/ogham/core/util/HtmlUtils::isRelativeUrl → NO_COVERAGE |
return false; |
| 328 | } | |
| 329 | } | |
| 330 | ||
| 331 | private static String escapeForJavaUri(String url) { | |
| 332 |
3
1. escapeForJavaUri : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::escapeForJavaUri → NO_COVERAGE 2. escapeForJavaUri : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::escapeForJavaUri → KILLED 3. escapeForJavaUri : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::escapeForJavaUri → KILLED |
return URI_INVALID_CHARS.matcher(url).replaceAll(URI_ESCAPE); |
| 333 | } | |
| 334 | ||
| 335 | @SuppressWarnings({ "java:S5361", "squid:S5361" }) | |
| 336 | private static String unescapeJavaUri(String url) { | |
| 337 |
3
1. unescapeJavaUri : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::unescapeJavaUri → NO_COVERAGE 2. unescapeJavaUri : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::unescapeJavaUri → KILLED 3. unescapeJavaUri : replaced return value with "" for fr/sii/ogham/core/util/HtmlUtils::unescapeJavaUri → KILLED |
return url.replaceAll(URI_ESCAPE, URI_INVALID_CHARS.pattern()); |
| 338 | } | |
| 339 | ||
| 340 | private static Pattern generateUrlFuncPattern(List<String> possibleQuotes) { | |
| 341 | StringJoiner joiner = new StringJoiner("|"); | |
| 342 | int i = 0; | |
| 343 | for (String possibleQuote : possibleQuotes) { | |
| 344 | joiner.add("(?<quotedform" + i + ">" + QUOTED_FORM.replace("#QUOTE#", Pattern.quote(possibleQuote)).replace("#QUOTENAME#", i + "") + ")"); | |
| 345 |
3
1. generateUrlFuncPattern : Changed increment from 1 to -1 → NO_COVERAGE 2. generateUrlFuncPattern : Changed increment from 1 to -1 → KILLED 3. generateUrlFuncPattern : Changed increment from 1 to -1 → KILLED |
i++; |
| 346 | } | |
| 347 | joiner.add("(?<unquotedform>" + UNQUOTED_FORM + ")"); | |
| 348 |
3
1. generateUrlFuncPattern : replaced return value with null for fr/sii/ogham/core/util/HtmlUtils::generateUrlFuncPattern → NO_COVERAGE 2. generateUrlFuncPattern : replaced return value with null for fr/sii/ogham/core/util/HtmlUtils::generateUrlFuncPattern → KILLED 3. generateUrlFuncPattern : replaced return value with null for fr/sii/ogham/core/util/HtmlUtils::generateUrlFuncPattern → KILLED |
return Pattern.compile(joiner.toString(), Pattern.MULTILINE); |
| 349 | } | |
| 350 | ||
| 351 | private HtmlUtils() { | |
| 352 | super(); | |
| 353 | } | |
| 354 | } | |
Mutations | ||
| 80 |
1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9 |
|
| 100 |
1.1 2.2 3.3 |
|
| 104 |
1.1 2.2 3.3 4.4 |
|
| 122 |
1.1 2.2 3.3 |
|
| 126 |
1.1 2.2 3.3 4.4 5.5 |
|
| 151 |
1.1 2.2 3.3 4.4 5.5 |
|
| 153 |
1.1 2.2 3.3 |
|
| 158 |
1.1 2.2 3.3 4.4 5.5 |
|
| 200 |
1.1 2.2 3.3 |
|
| 202 |
1.1 2.2 3.3 4.4 5.5 6.6 |
|
| 203 |
1.1 2.2 3.3 |
|
| 208 |
1.1 2.2 3.3 |
|
| 211 |
1.1 2.2 3.3 |
|
| 215 |
1.1 2.2 3.3 |
|
| 229 |
1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 |
|
| 246 |
1.1 2.2 3.3 4.4 5.5 |
|
| 248 |
1.1 2.2 3.3 |
|
| 249 |
1.1 2.2 3.3 |
|
| 252 |
1.1 2.2 3.3 4.4 5.5 |
|
| 286 |
1.1 2.2 3.3 |
|
| 287 |
1.1 2.2 |
|
| 290 |
1.1 2.2 3.3 |
|
| 319 |
1.1 2.2 3.3 |
|
| 320 |
1.1 2.2 |
|
| 323 |
1.1 2.2 3.3 4.4 5.5 6.6 |
|
| 327 |
1.1 |
|
| 332 |
1.1 2.2 3.3 |
|
| 337 |
1.1 2.2 3.3 |
|
| 345 |
1.1 2.2 3.3 |
|
| 348 |
1.1 2.2 3.3 |