前端之家收集整理的这篇文章主要介绍了
nutch中的 nutch-site.xml 配置,
前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
<?xmlversion="1.0"?>
<?xml-stylesheettype="text/xsl"href="configuration.xsl"?>
<!--Putsite-specificpropertyoverridesinthisfile.-->
<configuration>
<property>
<name>plugin.folders</name>
<value>./src/plugin</value>
<description>Directorieswherenutchpluginsarelocated.Each
elementmaybearelativeorabsolutepath.Ifabsolute,itisused
asis.Ifrelative,itissearchedforontheclasspath.</description>
</property>
<property>
<name>http.agent.name</name>
<value>YourNutchSpider</value>
</property>
<property>
<name>http.accept.language</name>
<value>ja-jp,en-us,en-gb,en;q=0.7,*;q=0.3</value>
<description>ValueoftheAccept-Languagerequestheaderfield.
Thisallowsselectingnon-Englishlanguageasdefaultonetoretrieve.
Itisausefulsettingforsearchenginesbuildforcertainnationalgroup.</description>
</property>
<property>
<name>storage.data.store.class</name>
<value>org.apache.gora.sql.store.sqlStore</value>
<description>TheGoraDataStoreclassforstoringandretrievingdata.
Currentlythefollowingstoresareavailable:.</description>
</property>
<property>
<name>parser.character.encoding.default</name>
<value>utf-8</value>
<description>Thecharacterencodingtofallbacktowhennootherinformation
isavailable</description>
</property>
<property>
<name>generate.batch.id</name>
<value>*</value>
</property>
</configuration>