我正在考虑使用regex(?< rule>(?< selector> [^ {}]){(?style> [^ {}])})从css中剥离规则,做简单的字符串替换,其中类被调用,但一些html元素已经有一个样式标签,所以我也必须考虑到这一点。
更新 – 2010年9月16日
我已经能够想出一个简单的CSS内联,只要你的HTML也是有效的XML。它使用正则表达式获取您的< style />元件。然后将css选择器转换为xpath表达式,并在任何预先存在的内联样式之前将样式内联添加到匹配的元素。
using System.Collections.Generic; using System.Text.RegularExpressions; using System.Xml.Linq; using System.Xml.XPath; namespace CssInliner { public class CssInliner { private static Regex _matchStyles = new Regex("\\s*(?<rule>(?<selector>[^{}]+){(?<style>[^{}]+)})",RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled ); public List<Match> Styles { get; private set; } public string InlinedXhtml { get; private set; } private XElement XhtmlDocument { get; set; } public CssInliner(string xhtml) { XhtmlDocument = ParseXhtml(xhtml); Styles = GetStyleMatches(); foreach (var style in Styles) { if (!style.Success) return; var cssSelector = style.Groups["selector"].Value.Trim(); var xpathSelector = CssToXpath.Transform(cssSelector); var cssStyle = style.Groups["style"].Value.Trim(); foreach (var element in XhtmlDocument.XPathSelectElements(xpathSelector)) { var inlineStyle = element.Attribute("style"); var newInlineStyle = cssStyle + ";"; if (inlineStyle != null && !string.IsNullOrEmpty(inlineStyle.Value)) { newInlineStyle += inlineStyle.Value; } element.SetAttributeValue("style",newInlineStyle.Trim().NormalizeCharacter(';').NormalizeSpace()); } } XhtmlDocument.Descendants("style").Remove(); InlinedXhtml = XhtmlDocument.ToString(); } private List<Match> GetStyleMatches() { var styles = new List<Match>(); var styleElements = XhtmlDocument.Descendants("style"); foreach (var styleElement in styleElements) { var matches = _matchStyles.Matches(styleElement.Value); foreach (Match match in matches) { styles.Add(match); } } return styles; } private static XElement ParseXhtml(string xhtml) { return XElement.Parse(xhtml); } } }
using System.Text.RegularExpressions; namespace CssInliner { public static class CssToXpath { public static string Transform(string css) { #region Translation Rules // References: http://ejohn.org/blog/xpath-css-selectors/ // http://code.google.com/p/css2xpath/source/browse/trunk/src/css2xpath.js var regexReplaces = new[] { // add @ for attribs new RegexReplace { Regex = new Regex(@"\[([^\]~\$\*\^\|\!]+)(=[^\]]+)?\]",RegexOptions.Multiline),Replace = @"[@$1$2]" },// multiple queries new RegexReplace { Regex = new Regex(@"\s*,\s*",Replace = @"|" },//,+ ~ > new RegexReplace { Regex = new Regex(@"\s*(\+|~|>)\s*",Replace = @"$1" },//* ~ + > new RegexReplace { Regex = new Regex(@"([a-zA-Z0-9_\-\*])~([a-zA-Z0-9_\-\*])",Replace = @"$1/following-sibling::$2" },new RegexReplace { Regex = new Regex(@"([a-zA-Z0-9_\-\*])\+([a-zA-Z0-9_\-\*])",Replace = @"$1/following-sibling::*[1]/self::$2" },new RegexReplace { Regex = new Regex(@"([a-zA-Z0-9_\-\*])>([a-zA-Z0-9_\-\*])",Replace = @"$1/$2" },// all unescaped stuff escaped new RegexReplace { Regex = new Regex(@"\[([^=]+)=([^'|""][^\]]*)\]",Replace = @"[$1='$2']" },// all descendant or self to // new RegexReplace { Regex = new Regex(@"(^|[^a-zA-Z0-9_\-\*])(#|\.)([a-zA-Z0-9_\-]+)",Replace = @"$1*$2$3" },new RegexReplace { Regex = new Regex(@"([\>\+\|\~\,\s])([a-zA-Z\*]+)",Replace = @"$1//$2" },new RegexReplace { Regex = new Regex(@"\s+\/\/",Replace = @"//" },// :first-child new RegexReplace { Regex = new Regex(@"([a-zA-Z0-9_\-\*]+):first-child",Replace = @"*[1]/self::$1" },// :last-child new RegexReplace { Regex = new Regex(@"([a-zA-Z0-9_\-\*]+):last-child",Replace = @"$1[not(following-sibling::*)]" },// :only-child new RegexReplace { Regex = new Regex(@"([a-zA-Z0-9_\-\*]+):only-child",Replace = @"*[last()=1]/self::$1" },// :empty new RegexReplace { Regex = new Regex(@"([a-zA-Z0-9_\-\*]+):empty",Replace = @"$1[not(*) and not(normalize-space())]" },// |= attrib new RegexReplace { Regex = new Regex(@"\[([a-zA-Z0-9_\-]+)\|=([^\]]+)\]",Replace = @"[@$1=$2 or starts-with(@$1,concat($2,'-'))]" },// *= attrib new RegexReplace { Regex = new Regex(@"\[([a-zA-Z0-9_\-]+)\*=([^\]]+)\]",Replace = @"[contains(@$1,$2)]" },// ~= attrib new RegexReplace { Regex = new Regex(@"\[([a-zA-Z0-9_\-]+)~=([^\]]+)\]",Replace = @"[contains(concat(' ',normalize-space(@$1),' '),concat(' ',$2,' '))]" },// ^= attrib new RegexReplace { Regex = new Regex(@"\[([a-zA-Z0-9_\-]+)\^=([^\]]+)\]",Replace = @"[starts-with(@$1,// != attrib new RegexReplace { Regex = new Regex(@"\[([a-zA-Z0-9_\-]+)\!=([^\]]+)\]",Replace = @"[not(@$1) or @$1!=$2]" },// ids new RegexReplace { Regex = new Regex(@"#([a-zA-Z0-9_\-]+)",Replace = @"[@id='$1']" },// classes new RegexReplace { Regex = new Regex(@"\.([a-zA-Z0-9_\-]+)",normalize-space(@class),' $1 ')]" },// normalize multiple filters new RegexReplace { Regex = new Regex(@"\]\[([^\]]+)",Replace = @" and ($1)" },}; #endregion foreach (var regexReplace in regexReplaces) { css = regexReplace.Regex.Replace(css,regexReplace.Replace); } return "//" + css; } } struct RegexReplace { public Regex Regex; public string Replace; } }
[TestMethod] public void TestCssToXpathRules() { var translations = new Dictionary<string,string> { { "*","//*" },{ "p","//p" },{ "p > *","//p/*" },{ "#foo","//*[@id='foo']" },{ "*[title]","//*[@title]" },{ ".bar","//*[contains(concat(' ',' bar ')]" },{ "div#test .note span:first-child","//div[@id='test']//*[contains(concat(' ',' note ')]//*[1]/self::span" } }; foreach (var translation in translations) { var expected = translation.Value; var result = CssInliner.CssToXpath.Transform(translation.Key); Assert.AreEqual(expected,result); } } [TestMethod] public void HtmlWithMultiLineClassStyleReturnsInline() { #region var html = ... var html = XElement.Parse(@"<html> <head> <title>Hello,World Page!</title> <style> .redClass { background: red; color: purple; } </style> </head> <body> <div class=""redClass"">Hello,World!</div> </body> </html>").ToString(); #endregion #region const string expected ... var expected = XElement.Parse(@"<html> <head> <title>Hello,World Page!</title> </head> <body> <div class=""redClass"" style=""background: red; color: purple;"">Hello,World!</div> </body> </html>").ToString(); #endregion var result = new CssInliner.CssInliner(html); Assert.AreEqual(expected,result.InlinedXhtml); }
private static readonly Regex NormalizeSpaceRegex = new Regex(@"\s{2,}",RegexOptions.None); public static string NormalizeSpace(this string data) { return NormalizeSpaceRegex.Replace(data,@" "); } public static string NormalizeCharacter(this string data,char character) { var normalizeCharacterRegex = new Regex(character + "{2,RegexOptions.None); return normalizeCharacterRegex.Replace(data,character.ToString()); }
HTML Agility Pack.它支持XPath查询,甚至有一个类似于为XML提供的标准.NET接口的LINQ接口,因此它应该是一个相当简单的替换。