网页的网址为http://www.bseindia.com/corporates/Insider_Trading_new.aspx?expandable=0
这看起来像一个非常普通的网站,但由于它是一个aspx页面,涉及ViewState和事件验证,因此预计这不会很容易.
我的第一步是使用Fiddler分析网站的GET和POST,这让我感到惊讶,因为Fiddler没有为这个网址捕获任何流量.我试过查尔斯,但这本身并没有抓住这个网址.除此之外,这个Url Fiddler和Charles都在捕捉其他一切.我还想提一下,当我使用HttpWebRequest从控制台应用程序调用Url时,Fiddler和Charles都捕获了它,但他们没有从Chrome,FireFox和Internet Explorer 11捕获它.
因此,我使用FireFox中的开发人员工具分析了网络活动,一切都是可见的,包括(标题,参数和Cookie).在Chrome中没有Cookie存在.当我通过创建HttpWebRequest检查cookie并得到响应时,没有cookie存在.所以这个网站真的很奇怪.
我以某种方式设法创建一个简单的函数来创建请求并获得响应.我正在做的是我首先创建一个GET请求并获取网站字符串并从中提取Viewstate,EventValidation等.我使用此信息在第二个HttpWebRequest中使用,这是一个帖子.现在一切正常,我收到的回复却不如预期.我想要两个给出日期之间的记录,我已经在表单数据中指定了这些日期,但POST请求仍然不返回过滤后的数据.我已经提到了我在下面创建的函数,我将非常感谢为什么会发生这种情况以及如何处理它的任何建议.要理解这一点对我来说已经成为一个挑战,因为我无法理解为什么这个简单的网站没有出现在Fiddler中. (这使用Javascript Postback)
代码可能看起来很长而且可怕,但它非常简单直接.
Try ' First GET Request to obtain Viewstate,Eventvalidation etc Dim objRequest2 As Net.HttpWebRequest = DirectCast(HttpWebRequest.Create("http://www.bseindia.com/corporates/Insider_Trading_new.aspx?expandable=0"),HttpWebRequest) objRequest2.Method = "GET" objRequest2.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" objRequest2.Headers.Add("Accept-Encoding","gzip,deflate") objRequest2.Headers.Add("Accept-Language","en-GB,en-US;q=0.8,en;q=0.6,ur;q=0.4") objRequest2.KeepAlive = True objRequest2.ContentType = "application/x-www-form-urlencoded" objRequest2.Host = "www.bseindia.com" objRequest2.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/47.0.2526.106 Safari/537.36" objRequest2.AutomaticDecompression = DecompressionMethods.Deflate Or DecompressionMethods.GZip Dim LoginRes2 As Net.HttpWebResponse Dim sr2 As IO.StreamReader LoginRes2 = objRequest2.GetResponse() sr2 = New IO.StreamReader(LoginRes2.GetResponseStream) Dim getString As String = sr2.ReadToEnd() Dim getCookieCollection = objRequest2.CookieContainer ' get the page ViewState Dim viewStateFlag As String = "id=""__VIEWSTATE"" value=""" Dim i As Integer = getString.IndexOf(viewStateFlag) + viewStateFlag.Length Dim j As Integer = getString.IndexOf("""",i) Dim viewState As String = getString.Substring(i,j - i) ' get page EventValidation Dim eventValidationFlag As String = "id=""__EVENTVALIDATION"" value=""" i = getString.IndexOf(eventValidationFlag) + eventValidationFlag.Length j = getString.IndexOf("""",i) Dim eventValidation As String = getString.Substring(i,j - i) ' get page EventValidation Dim viewstateGeneratorFlag As String = "id=""__VIEWSTATEGENERATOR"" value=""" i = getString.IndexOf(viewstateGeneratorFlag) + viewstateGeneratorFlag.Length j = getString.IndexOf("""",i) Dim viewStateGenerator As String = getString.Substring(i,j - i) viewState = System.Web.HttpUtility.UrlEncode(viewState) eventValidation = System.Web.HttpUtility.UrlEncode(eventValidation) Dim LoginRes As Net.HttpWebResponse Dim sr As IO.StreamReader Dim objRequest As Net.HttpWebRequest ' Second POST request to post the form data along with cookies objRequest = DirectCast(HttpWebRequest.Create("http://www.bseindia.com/corporates/Insider_Trading_new.aspx?expandable=0"),HttpWebRequest) Dim formDataCollection As New NameValueCollection formDataCollection.Add("__EVENTTARGET","") formDataCollection.Add("__EVENTARGUMENT","") formDataCollection.Add("__VIEWSTATE",viewState) formDataCollection.Add("__VIEWSTATEGENERATOR",viewStateGenerator) formDataCollection.Add("__EVENTVALIDATION",eventValidation) formDataCollection.Add("fmdate","20160104") formDataCollection.Add("eddate","20160204") formDataCollection.Add("hidCurrentDate","2016/02/04") formDataCollection.Add("ctl00_ContentPlaceHolder1_hdnCode","") formDataCollection.Add("txtDate","04/01/2016") formDataCollection.Add("ddlCalMonthDiv3","1") formDataCollection.Add("ddlCalYearDiv3","2016") formDataCollection.Add("txtTodate","04/02/2016") formDataCollection.Add("ddlCalMonthDiv4","2") formDataCollection.Add("ddlCalYearDiv4","2016") formDataCollection.Add("Hidden1","") formDataCollection.Add("ctl00_ContentPlaceHolder1_GetQuote1_smartSearch","Enter Security Name / Code / ID") formDataCollection.Add("btnSubmit.x","44") formDataCollection.Add("btnSubmit.y","2") Dim strFormdata As String = formDataCollection.ToString() Dim encoding As New ASCIIEncoding Dim postBytes As Byte() = encoding.GetBytes(strFormdata) objRequest.Method = "POST" objRequest.Accept = "text/html,*/*;q=0.8" objRequest.Headers.Add("Accept-Encoding",deflate") objRequest.Headers.Add("Accept-Language",ur;q=0.4") objRequest.Headers.Add("Cache-Control","private,max-age=60") objRequest.KeepAlive = True objRequest.ContentType = "application/x-www-form-urlencoded" objRequest.Host = "www.bseindia.com" objRequest.Headers.Add("Origin","http://www.bseindia.com") objRequest.Referer = "http://www.bseindia.com/corporates/Insider_Trading_new.aspx?expandable=0" objRequest.Headers.Add("Upgrade-Insecure-Requests","1") objRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/47.0.2526.106 Safari/537.36" objRequest.ContentType = "text/html; charset=utf-8" objRequest.Date = "Thu,04 Feb 2016 13:42:04 GMT" objRequest.Headers.Add("Server","Microsoft-IIS/8.0") objRequest.Headers.Add("Vary","Accept-Encoding") objRequest.Headers.Add("X-AspNet-Version","2.0.50727") objRequest.Headers.Add("ASP.NET","ASP.NET") objRequest.AutomaticDecompression = DecompressionMethods.Deflate Or DecompressionMethods.GZip Dim gaCookies As New CookieContainer() Dim cookie1 As New Cookie("__asc","f673f0d5152a823bc335f575d34") cookie1.Domain = ".bseindia.com" cookie1.Path = "/" gaCookies.Add(cookie1) Dim cookie2 As New Cookie("__auc","f673f0d5152a823bc335f575d34") cookie2.Domain = ".bseindia.com" cookie2.Path = "/" gaCookies.Add(cookie2) Dim cookie3 As New Cookie("__utma","253454874.280640365.1454519857.1454519865.1454519865.1") cookie3.Domain = ".bseindia.com" cookie3.Path = "/" gaCookies.Add(cookie3) Dim cookie4 As New Cookie("__utmb","253454874.1.10.1454519865") cookie4.Domain = ".bseindia.com" cookie4.Path = "/" gaCookies.Add(cookie4) Dim cookie5 As New Cookie("__utmc","253454874") cookie5.Domain = ".bseindia.com" cookie5.Path = "/" gaCookies.Add(cookie5) Dim cookie6 As New Cookie("__utmt","1") cookie6.Domain = ".bseindia.com" cookie6.Path = "/" gaCookies.Add(cookie6) Dim cookie7 As New Cookie("__utmz","253454874.1454519865.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)") cookie7.Domain = ".bseindia.com" cookie7.Path = "/" gaCookies.Add(cookie7) Dim cookie8 As New Cookie("_ga","GA1.2.280640365.1454519857") cookie8.Domain = ".bseindia.com" cookie8.Path = "/" gaCookies.Add(cookie8) Dim cookie9 As New Cookie("_gat","1") cookie9.Domain = ".bseindia.com" cookie9.Path = "/" gaCookies.Add(cookie9) Dim postStream As Stream = objRequest.GetRequestStream() postStream.Write(postBytes,postBytes.Length) postStream.Flush() postStream.Close() LoginRes = objRequest.GetResponse() sr = New IO.StreamReader(LoginRes.GetResponseStream) ReadWebsite = sr.ReadToEnd() sr.Close() sr = Nothing LoginRes.Close() LoginRes = Nothing objRequest = Nothing Exit Function Catch ex As Exception ReadWebsite = Nothing End Try
注意:(没有viewstate和eventvalidation的日期的原始表单数据)
fmdate:20160130
eddate:20160205
hidCurrentDate:2016年2月5日
ctl00_ContentPlaceHolder1_hdnCode:
txtDate:2016年4月1日
ddlCalMonthDiv3:1
ddlCalYearDiv3 2016
txtTodate:2016年4月2日
ddlCalMonthDiv4:2
ddlCalYearDiv4 2016
Hidden1:
ctl00_ContentPlaceHolder1_GetQuote1_smartSearch:输入安全名称/代码/ ID
btnSubmit.x:55
btnSubmit.y:13