在官方提供的Pipeline中没有MysqL的实现。今天在做数据抓取时需要把数据都保存到MysqL中。
首先自定义一个类实现Pipeline:
package com.lacerta.weiwei.news; import com.lacerta.util.JdbcUtil; import us.codecraft.webmagic.ResultItems; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.pipeline.Pipeline; public class ZiXunMysqLPipeline implements Pipeline { @Override public void process(ResultItems resultItems,Task task) { String source_url = resultItems.getRequest().getUrl(); String title = resultItems.get("title"); String source = resultItems.get("source"); String publish_time = resultItems.get("publish_time"); String browse_times = resultItems.get("browse_times"); String content = resultItems.get("content"); String type = resultItems.get("type"); String sql = "INSERT INTO t_news " + "(source_url,title,content,source,publish_time,browse_times,type ) VALUES ( '" + // source_url.replace("'","\\\'") + "','" + // title.replace("'",'" + // content.replace("'",'" + // source.replace("'",'" + // publish_time.replace("'"," + // browse_times.replace("'","\\\'") + ",'" + // type.replace("'","\\\'") + "' );"; System.out.println(sql); JdbcUtil.executesql(sql); } }
package com.lacerta.util; import java.io.IOException; import java.io.InputStream; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.sql.Connection; import java.sql.Date; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.sqlException; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties; public class JdbcUtil { private static String driver; private static String url; private static String username; private static String password; static {// 静态方法块,加载驱动 InputStream is = JdbcUtil.class.getResourceAsStream("/driver.properties"); Properties prop = new Properties(); try { prop.load(is); } catch (IOException e1) { e1.printStackTrace(); } driver = prop.getProperty("driver"); url = prop.getProperty("url"); username = prop.getProperty("username"); password = prop.getProperty("password"); try { Class.forName(driver); } catch (ClassNotFoundException e) { e.printStackTrace(); } } public static void executesql(String sql) { Connection conn = getConn(); PreparedStatement ps = null; try { ps = conn.prepareStatement(sql); ps.executeUpdate(); } catch (sqlException e) { e.printStackTrace(); } finally { close(conn,ps,null); } } private static Connection getConn() { Connection conn = null; try { conn = DriverManager.getConnection(url,username,password); } catch (sqlException e) { e.printStackTrace(); } return conn; } private static void close(Connection conn,Statement stat,ResultSet rs) { if (rs != null) try { rs.close(); } catch (sqlException e) { e.printStackTrace(); } if (stat != null) try { stat.close(); } catch (sqlException e) { e.printStackTrace(); } if (conn != null) try { conn.close(); } catch (sqlException e) { e.printStackTrace(); } } }
driver.properties
driver=com.MysqL.cj.jdbc.Driver url=jdbc:MysqL://127.0.0.1:3306/tableName?serverTimezone=UTC username={你自己的用户名} password={你自己的密码} ## Driver如果使用这个com.MysqL.cj.jdbc包小的,url后要加上 ?serverTimezone=UTC
MysqL驱动包的Maven依赖:
<dependency> <groupId>MysqL</groupId> <artifactId>MysqL-connector-java</artifactId> <version>6.0.4</version> </dependency>