【笔记】【LINQ编程技术内幕】第十八章 从XML中提取数据

样例XML

<?xml version="1.0" encoding="utf-8" ?>
<pets>
  <pet>
    <id>1</id>
    <name>Duke</name>
    <species>Great Dane</species>
    <sex>Male</sex>
    <startYear>1968</startYear>
    <endYear>1970</endYear>
    <causeOfDeath>Suicide</causeOfDeath>
    <specialQuality>Big and goofy</specialQuality>
  </pet>
  <pet>
    <id>2</id>
    <name>Dog</name>
    <species>Some Kind of Cat</species>
    <sex>Female</sex>
    <startYear>1972</startYear>
    <endYear>1974</endYear>
    <causeOfDeath>Car</causeOfDeath>
    <specialQuality>Best mouser</specialQuality>
  </pet>
  <pet>
    <id>3</id>
    <name>Sam</name>
    <species>Labrador</species>
    <sex>Female</sex>
    <startYear>1973</startYear>
    <endYear>1980</endYear>
    <causeOfDeath>Old Age</causeOfDeath>
    <specialQuality>Great hunting dog</specialQuality>
  </pet>
  <pet>
    <id>4</id>
    <name>Hogan</name>
    <species>Yellow Lab Mix</species>
    <sex>Male</sex>
    <startYear>1994</startYear>
    <endYear>2004</endYear>
    <causeOfDeath>Seizure</causeOfDeath>
    <specialQuality>A very good dog</specialQuality>
  </pet>
  <pet>
    <id>5</id>
    <name>Leda</name>
    <species>Chocolate Labradour</species>
    <sex>Female</sex>
    <startYear>2004</startYear>
    <endYear></endYear>
    <causeOfDeath></causeOfDeath>
    <specialQuality>Thumper</specialQuality>
  </pet>
  <pet>
    <id>6</id>
    <name>Po</name>
    <species>Toy Poodle</species>
    <sex>Female</sex>
    <startYear>2003</startYear>
    <endYear>2004</endYear>
    <causeOfDeath>Lethal Injection</causeOfDeath>
    <specialQuality>Mental</specialQuality>
  </pet>
  <pet>
    <id>7</id>
    <name>Big Mama</name>
    <species>Tabby</species>
    <sex>Female</sex>
    <startYear>1998</startYear>
    <endYear></endYear>
    <causeOfDeath></causeOfDeath>
    <specialQuality>Quarterback</specialQuality>
  </pet>
  <pet>
    <id>8</id>
    <name>Ruby</name>
    <species>Rotweiller</species>
    <sex>Female</sex>
    <startYear>1997</startYear>
    <endYear></endYear>
    <causeOfDeath></causeOfDeath>
    <specialQuality>Big baby</specialQuality>
  </pet>
  <pet>
    <id>9</id>
    <name>Nala</name>
    <species>Maine Coon</species>
    <sex>Female</sex>
    <startYear>2007</startYear>
    <endYear></endYear>
    <causeOfDeath></causeOfDeath>
    <specialQuality>El Freako</specialQuality>
  </pet>
</pets>

加载XML文档

对于LINQ to XML,需要加载一个XML文档以便将数据获取到内存中。可以用System.Xml.Linq.XDocument或System.Xml.Linq.XElement的实例来加载XML文档,也可以用XElement.Parse将字符串形式的XML文档转换成XML。

  • System.Xml.Linq.XDocument.Load(xml_file_name) 从根节点开始加载整个XML文档
  • System.Xml.Linq.XElement.Load(xml_file_name) 该方法也会加载跟节点,不过在查询时会忽略根节点
    XDocument.Load和XElement.Load都有一些冲澡版本,可以接收字符串文件名、TextReader或XmlReader。其中,有个版本(Load(string, LoadOptions))从文件加载XML,LoadOption可以保留空白、设置基统一资源定位符(URI)或从XmlReader中获取行信息。LoadOptions枚举的选项有None、PreserveWhitespace、SetBaseUri和SetLineInfo。

查询XML

使用XDocument

class Program
{
	static void Main(string[] args)
	{
		XDocument xml = XDocument.Load("..\\..\\PetCemetary.xml");
		var pets = from pet in xml.Elements("pets").Elements("pet") select pet;

		Array.ForEach(pets.ToArray(), p => Console.WriteLine(p.Element("name").Value));
	}
}

使用XElement

class Program
{
	static void Main(string[] args)
	{
		XElement xml = XElement.Load("..\\..\\PetCemetary.xml");
		// 相比XDocument 缺少了一层.Elements("pets")的请求
		var pets = from pet in xml.Elements("pet") select pet;

		Array.ForEach(pets.ToArray(), p => Console.WriteLine(p.Element("name").Value));  
	}
}

管理属性

class Program
{
	static void Main(string[] args)
	{
		XElement xml = XElement.Load("..\\..\\PetCemetary.xml");
		
		// 临时范围变量genus是在let子句中定义的
		var pets = from pet in xml.Elements("pet")
				   let genus = pet.Element("species").Attribute("genus")
				   where genus.Value == "Feline"
				   select pet;

	  Array.ForEach(pets.ToArray(), p => Console.WriteLine(p.Element("name").Value));
	}
}

添加属性

class Program
{
	static void Main(string[] args)
	{
		string filename = "..\\..\\PetCemetary.xml";

		XElement xml = XElement.Load(filename);
		var pets = from pet in xml.Elements("pet")
				   let genus = pet.Element("species").Attribute("genus")
				   where genus == null
				   select pet;

		Array.ForEach(pets.ToArray(), p => Console.WriteLine(p.Element("name").Value));

		foreach (var pet in pets)
		{
			pet.Element("species").Add(new XAttribute("genus", "Dog"));
		}

		xml.Save(filename);
	}
}

删除属性

class Program
{
	static void Main(string[] args)
	{
		string filename = "..\\..\\PetCemetary.xml";

		XElement xml = XElement.Load(filename);
		var pets = from pet in xml.Elements("pet")
				   where pet.Element("name").Value == "Ruby"
				   select pet;

		Array.ForEach(pets.ToArray(), p => Console.WriteLine(p.Element("name").Value));

		foreach (var pet in pets)
		{
			pet.Element("species").Attribute("genus").Remove();
		}

		xml.Save(filename);
	}
}

从字符串中加载XML

class Program
{
	static void Main(string[] args)
	{
		string xml = "<pets>" +
					 "  <pet>" +
					 "    <id>2</id>" +
					 "    <name>Dog</name>" +
					 "    <species>Some Kind of Cat</species>" +
					 "    <sex>Female</sex>" +
					 "    <startYear>1972</startYear>" +
					 "    <endYear>1974</endYear>" +
					 "    <causeOfDeath>Car</causeOfDeath>" +
					 "    <specialQuality>Best mouser</specialQuality>" +
					 "  </pet>" +
					 "</pets>";

		XElement elem = XElement.Parse(xml);
		var pets = from pet in elem.Elements("pet") select pet;

		Array.ForEach(pets.ToArray(), p => Console.WriteLine(p.Element("name").Value));
	}
}

处理缺失的数据

有些数据可能并没有出现在XML中。比如说,XML中允许出现没有数据的元素。

class Program
{
	static void Main(string[] args)
	{
		XElement elem = XElement.Load(@"..\..\PetCemetary.xml");
		// 不是每一个元素都有endYear元素
		var pets = from pet in elem.Elements("pet")
				   select new
				   {
					   Name = pet.Name,
					   StartYear = (int)pet.Element("startYear"),
					   EndYear = (int)(pet.Element("endYear").IsEmpty ? 0 : (int)pet.Element("endYear"))
				   };

		Array.ForEach(pets.ToArray(), p =>
		{
			Console.WriteLine("Name: {0}", p.Name);
			Console.WriteLine("Entered family: {0}", p.StartYear);
			Console.WriteLine("Left family: {0}", p.EndYear);
		});
	}
}

使用查询表达式和XML数据

样例XML数据

<?xml version="1.0" encoding="utf-8" ?>
<sq:Stocks xmlns:sq="http://www.stock_quotes.com">
  <sq:Stock>
    <sq:Symbol>MSFT</sq:Symbol>
    <sq:Price Change="0.6" Low="42.1" High="51.0">56.0</sq:Price>
  </sq:Stock> 
  <sq:Stock>
    <sq:Symbol>MVK</sq:Symbol>
    <sq:Price Change="-3.2" Low="22.8" High="32.4">25.5</sq:Price>
  </sq:Stock>
  <sq:Stock>
    <sq:Symbol>GOOG</sq:Symbol>
    <sq:Price Change="8.0" Low="24.4" High="34.5">32.0</sq:Price>
  </sq:Stock> 
  <sq:Stock>
    <sq:Symbol>VFINX</sq:Symbol>
    <sq:Price Change="8.0" Low="24.4" High="34.5">32.0</sq:Price>
  </sq:Stock>
  <sq:Stock>
    <sq:Symbol>HDPMX</sq:Symbol>
    <sq:Price Change="8.0" Low="24.4" High="34.5">32.0</sq:Price>
  </sq:Stock>
</sq:Stocks>

使用命名空间

上面的样例XML相比于之前的XML在结构上有个重要的区别是:它添加了一个命名空间(即xmlns 属性)。下面的例子中,定义了一个XNamespace对象,然后再查询XML文件时,将其作为所有使用了这个命名空间的元素的前缀。

class Program
{
	static void Main(string[] args)
	{
		const string filename = "..\\..\\Stocks.xml";
		XElement xml = XElement.Load(filename);
		XNamespace sq = "http://www.stock_quotes.com";

		var stocks = from stock in xml.Elements(sq + "Stock")
		             select new { Name = stock.Element(sq + "Symbol").Value };

		Array.ForEach(stocks.ToArray(), o => Console.WriteLine(o.Name));
	}
}

嵌套查询

class Program
{
	static void Main(string[] args)
	{
		const string filename = "..\\..\\Stocks.xml";
		XElement xml = XElement.Load(filename);
		XNamespace sq = "http://www.stock_quotes.com";

		var stocks =from stock in xml.Elements(sq + "Stock")
		            where (from symbol in stock.Elements(sq + "Symbol") select symbol).Any()
		            select new { Name = stock.Element(sq + "Symbol").Value };

		Array.ForEach(stocks.ToArray(), o => Console.WriteLine(o.Name));
	}
}

使用Where子句进行筛选

class Program
{
	static void Main(string[] args)
	{
		const string filename = "..\\..\\Stocks.xml";
		XElement xml = XElement.Load(filename);
		XNamespace sq = "http://www.stock_quotes.com";

		var stocksThatLostGround =
		  from stock in xml.Elements(sq + "Stock")
		  where (
		  from price in stock.Elements(sq + "Price")
			  where (decimal)price.Attribute("Change") < 0 select price).Any()
		  select stock;

		Array.ForEach(stocksThatLostGround.ToArray(), o => Console.WriteLine(o.Element(sq + "Symbol").Value));
	}
}

根据上下文查找元素

class Program
{
	static void Main(string[] args)
	{
		XElement elem = XElement.Parse(
		   "<?xml version=\"1.0\" encoding=\"utf-8\" ?>" +
		   "<sq:Stocks xmlns:sq=\"http://www.stock_quotes.com\">" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>MSFT</sq:Symbol>" +
		   "    <sq:Price Change=\"0.6\" Low=\"42.1\" High=\"51.0\">56.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>MVK</sq:Symbol>" +
		   "    <sq:Price Change=\"-3.2\" Low=\"22.8\" High=\"32.4\">25.5</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>GOOG</sq:Symbol>" +
		   "    <sq:Price Change=\"8.0\" Low=\"24.4\" High=\"34.5\">32.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>VFINX</sq:Symbol>" +
		   "    <sq:Price Change=\"8.0\" Low=\"24.4\" High=\"34.5\">32.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>HDPMX</sq:Symbol>" +
		   "    <sq:Price Change=\"8.0\" Low=\"24.4\" High=\"34.5\">32.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "</sq:Stocks>");

		XNamespace sq = "http://www.stock_quotes.com";

		var contextStock =
		  from symbol in elem.Elements(sq + "Stock").Elements(sq + "Symbol")
		  let price = symbol.ElementsAfterSelf().FirstOrDefault()
		  where (decimal)(price.Attribute("Change")) < 1M
		  select symbol;

		Array.ForEach(contextStock.ToArray(), o => Console.WriteLine(o.Value));
	}
}

对XML查询进行排序

class Program
{
	static void Main(string[] args)
	{
		XElement elem = XElement.Parse(
		   "<?xml version=\"1.0\" encoding=\"utf-8\" ?>" +
		   "<sq:Stocks xmlns:sq=\"http://www.stock_quotes.com\">" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>MSFT</sq:Symbol>" +
		   "    <sq:Price Change=\"0.6\" Low=\"42.1\" High=\"51.0\">56.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>MVK</sq:Symbol>" +
		   "    <sq:Price Change=\"-3.2\" Low=\"22.8\" High=\"32.4\">25.5</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>GOOG</sq:Symbol>" +
		   "    <sq:Price Change=\"8.0\" Low=\"24.4\" High=\"34.5\">32.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>VFINX</sq:Symbol>" +
		   "    <sq:Price Change=\"8.0\" Low=\"24.4\" High=\"34.5\">32.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>HDPMX</sq:Symbol>" +
		   "    <sq:Price Change=\"8.0\" Low=\"24.4\" High=\"34.5\">32.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "</sq:Stocks>");

		XNamespace sq = "http://www.stock_quotes.com";

		var contextStock =
		  from symbol in elem.Elements(sq + "Stock").Elements(sq + "Symbol")
		  let price = symbol.ElementsAfterSelf().FirstOrDefault()
		  where (decimal)(price.Attribute("Change")) < 1M
		  orderby (decimal)price.Attribute("Change")
		  select symbol;

		Array.ForEach(contextStock.ToArray(), o => Console.WriteLine(o.Value));
	}
}

通过let计算中间值

class Program
{
	static void Main(string[] args)
	{
		XElement elem = XElement.Parse(
		   "<?xml version=\"1.0\" encoding=\"utf-8\" ?>" +
		   "<sq:Stocks xmlns:sq=\"http://www.stock_quotes.com\">" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>MSFT</sq:Symbol>" +
		   "    <sq:Price Change=\"0.6\" Low=\"42.1\" High=\"51.0\">56.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>MVK</sq:Symbol>" +
		   "    <sq:Price Change=\"-3.2\" Low=\"22.8\" High=\"32.4\">25.5</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>GOOG</sq:Symbol>" +
		   "    <sq:Price Change=\"8.0\" Low=\"24.4\" High=\"34.5\">32.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>VFINX</sq:Symbol>" +
		   "    <sq:Price Change=\"8.0\" Low=\"24.4\" High=\"34.5\">32.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "  <sq:Stock>" +
		   "    <sq:Symbol>HDPMX</sq:Symbol>" +
		   "    <sq:Price Change=\"8.0\" Low=\"24.4\" High=\"34.5\">32.0</sq:Price>" +
		   "  </sq:Stock>" +
		   "</sq:Stocks>");

		XNamespace sq = "http://www.stock_quotes.com";

		var stockSpreads =
		  from stock in elem.Elements(sq + "Stock")
		  let spread = (decimal)stock.Element(sq + "Price").Attribute("High") -
			(decimal)stock.Element(sq + "Price").Attribute("Low")
		  orderby spread descending
		  select new { Symbol = stock.Element(sq + "Symbol").Value, Spread = spread };

		Array.ForEach(stockSpreads.ToArray(), o => Console.WriteLine(o));
	}
}

节点批注

class Program
{
	static void Main(string[] args)
	{
		const string filename = "..\\..\\Stocks.xml";
		XElement elem = XElement.Load(filename);
		XNamespace sq = "http://www.stock_quotes.com";

		var stocksToAnnotate =
		  from stock in elem.Elements(sq + "Stock")
		  select stock;

		const string yahooQuery = "http://download.finance.yahoo.com/d/?s={0}&f=ncbh";
		foreach (var stock in stocksToAnnotate)
		{
			stock.AddAnnotation(string.Format(yahooQuery, stock.Element(sq + "Symbol").Value));
			Console.WriteLine(stock.Annotation(typeof(Object)));
		}
	}
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

zhy29563

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值