抓取历年天气

前端之家收集整理的这篇文章主要介绍了抓取历年天气前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

使用goquery抓取天气的demo。数据量有点多。目前按省份存储天气数据。存储到csv文件中。


package main

import (
	"code.google.com/p/mahonia"
	"encoding/csv"
	"fmt"
	"github.com/PuerkitoBio/goquery"
	"net/http"
	"os"
	"strings"
	"time"
)

var log = loger.Loger{
	Level: loger.DEBUG,}

const (
	YEAR      = 2013
	SleepTime = 100 //毫秒
)

func main() {
	sc,cc := GetCity()
	var weatherInfoAll []*WeaterInfo
	for key,value := range sc {
		filePath := fmt.Sprintf("%d%s.csv",YEAR,key)
		_,err := os.Stat(filePath)
		if err == nil {
			continue
		}
		weatherInfoAll = make([]*WeaterInfo,100000)
		for _,city := range value {
			name := cc[city]
			log.Debug("get ",key,city)
			client := &http.Client{}
			weatherInfoYear := GetWeather(client,city,name)
			weatherInfoAll = append(weatherInfoAll,weatherInfoYear...)
		}
		SaveToCSV(key,weatherInfoAll)
	}
}

//返回数据为省份=>城市名  城市名=>拼音.html
func GetCity() (sc map[string][]string,cc map[string]string) {
	url := "http://www.tianqihoubao.com/lishi/"
	request,err := http.NewRequest("GET",url,nil)
	if err != nil {
		log.Log(err)
		return
	}
	request.Header.Add("User-Agent","Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML,like Gecko) Ubuntu Chromium/39.0.2171.65 Chrome/39.0.2171.65 Safari/537.36")
	request.Header.Add("referer","http://www.tianqihoubao.com/")
	resp,err := http.DefaultClient.Do(request)
	if err != nil {
		log.Log(err)
		return
	}
	document,err := goquery.NewDocumentFromResponse(resp)
	if err != nil {
		log.Log(err)
		return
	}
	gbk := mahonia.NewDecoder("gbk")
	sc = make(map[string][]string)
	cc = make(map[string]string)
	document.Find(".citychk").Find("dl").Each(func(index int,s *goquery.Selection) {
		province := gbk.ConvertString(s.Find("dt").Find("b").Text())
		citys := make([]string,20)
		s.Find("dd").Find("a").Each(func(index int,se *goquery.Selection) {
			uri,exists := se.Attr("href")
			if !exists {
				return
			}
			name := gbk.ConvertString(se.Text())
			uri = strings.Replace(uri,".html","",-1)
			citys = append(citys,name)
			cc[name] = uri
		})
		sc[province] = citys
	})
	return
}

type WeaterInfo struct {
	Province string
	City     string
	Date     string
	Info     string
	Temp     string
	Wind     string
}

func GetWeather(client *http.Client,province,name string) []*WeaterInfo {
	baseUrl := fmt.Sprintf("http://www.tianqihoubao.com%s/month/%%s",name)
	weaterInfoYear := make([]*WeaterInfo,380)
	for i := 1; i <= 12; i++ {
		url := fmt.Sprintf(baseUrl,fmt.Sprintf("%d%02d.html",i))
		weaterInfos := GetWeatherInfo(client,url)
		weaterInfoYear = append(weaterInfoYear,weaterInfos...)
		time.Sleep(time.Millisecond * SleepTime)
	}
	return weaterInfoYear
}

func GetWeatherInfo(client *http.Client,url string) (weaterInfos []*WeaterInfo) {
	request,err := client.Do(request)
	if err != nil {
		log.Log(err)
		return
	}
	document,err := goquery.NewDocumentFromResponse(resp)
	if err != nil {
		log.Log(err)
		return
	}
	gbk := mahonia.NewDecoder("gbk")
	weaterInfos = make([]*WeaterInfo,31)
	document.Find("#content").Find("tbody").Find("tr").Each(func(index int,s *goquery.Selection) {
		//排除第一个
		if index == 0 {
			return
		}
		var date,info,temp,wind string
		s.Find("td").Each(func(index int,se *goquery.Selection) {
			if index == 0 {
				date = gbk.ConvertString(se.Find("a").Text())
			}
			if index == 1 {
				info = gbk.ConvertString(se.Text())
			}
			if index == 2 {
				temp = gbk.ConvertString(se.Text())
			}
			if index == 3 {
				wind = gbk.ConvertString(se.Text())
			}
		})
		weatherInfo := &WeaterInfo{
			Province: province,City:     city,Date:     date,Info:     info,Temp:     temp,Wind:     wind,}
		weaterInfos = append(weaterInfos,weatherInfo)
	})
	return
}

func SaveToCSV(file string,weatherInfos []*WeaterInfo) (err error) {
	filePath := fmt.Sprintf("%d%s.csv",file)
	_,err = os.Stat(filePath)
	if err == nil {
		return
	}
	f,err := os.Create(filePath)
	if err != nil {
		log.Log(err)
		return
	}
	defer f.Close()
	f.WriteString("\xEF\xBB\xBF")  //UTF-8
	w := csv.NewWriter(f)
	w.Write([]string{"省份","城市","日期","天气状况","气温","风力风向"})
	for i,weatherInfo := range weatherInfos {
		if i%1000 == 0 {
			w.Flush() //刷入文件
		}
		strs := []string{TrimSpace(weatherInfo.Province),TrimSpace(weatherInfo.City),TrimSpace(weatherInfo.Date),TrimSpace(weatherInfo.Info),TrimSpace(weatherInfo.Temp),TrimSpace(weatherInfo.Wind)}
		w.Write(strs)
	}
	w.Flush()
	return
}

func TrimSpace(value string) string {
	value = strings.Replace(value,"\n",-1)
	return strings.Replace(value," ",-1)
}

日志库删掉了,因为看起来有点不是很好。当然,也没有说这个代码好。只是临时写着东西。

猜你在找的Go相关文章