【IT168技术文档】
最近遇到一个问题,就是提取Html文件中的Image标签<img * />,并需要提取Image标签里的属性值
我的实现如下:
private void ParseImageFromHtmlConent() { //搜索图像的正则表达式 string pattern = @"(<img[^>]*((/>)|(>\s*</img>)))"; //要解析的Html内容 string url = "sdfasdfs<br />sdf<br /><img src=\"http://localhost:49573/images/6575636a-9bff-4a68-a1c7-f8ef4f78a2a2.flv\" alt=\"sdf\" style=\"width:150px;height:150px;border-color:red\" ></img><br />sdf<img src=\"http://localhost:49573/images/6575636a-9bff-4a68-a1c7-f8ef4f78a2a2.flv\" alt=\"sdf\" style=\"width:150px;height:150px\" />fdfdf"; Regex reg = new Regex(pattern, RegexOptions.Multiline | RegexOptions.Compiled | RegexOptions.IgnoreCase); url = reg.Replace(url, new MatchEvaluator(ParseImageTag)); } private void ParseImageTag(Match m) { GroupCollection gc = m.Groups; string img = gc[1].ToString(); Dictionary<string, string> dic = ParseImagePropertiesTages(img); string style = dic["style"]; Dictionary<string, string> dicStyle = ParseTagesStyle(style); //下面根据你的需求进行处理 //........................ } private Dictionary<string, string> ParseImagePropertiesTages(string img) { Dictionary<string, string> dic = new Dictionary<string, string>(); //搜索属性的正则表达式 string pattern = @"([a-zA-Z]+)\s*=\s*[""']\s*([^""']+)\s*[""']"; Regex reg = new Regex(pattern); MatchCollection mc = reg.Matches(img); foreach (Match m in mc) { GroupCollection groupCol = m.Groups; string key = groupCol[1].ToString().Trim(); string value = groupCol[2].ToString().Trim(); if (dic.ContainsKey(key.ToLower())) { dic.Remove(key.ToLower()); } dic.Add(key.ToLower(), value); } return dic; } private Dictionary<string, string> ParseTagesStyle(string style) { Dictionary<string, string> dic = new Dictionary<string, string>(); if (string.IsNullOrEmpty(style)) return dic; //解析style的正则表达式 string pattern = @"([a-zA-Z\-]+)\s*:\s*([^;]*);*"; Regex reg = new Regex(pattern); MatchCollection mc = reg.Matches(style); foreach (Match m in mc) { GroupCollection groupCol = m.Groups; string key = groupCol[1].ToString().Trim(); string value = groupCol[2].ToString().Trim(); if (dic.ContainsKey(key.ToLower())) { dic.Remove(key.ToLower()); } dic.Add(key.ToLower(), value); } return dic; }