c#远程html数据抓取实例分享

论坛 期权论坛 脚本     
niminba   2021-5-23 02:54   2288   0

复制代码 代码如下:

/// <summary>
        /// 获取远程html
        /// </summary>
        /// <param name="url"></param>
        /// <param name="methed"></param>
        /// <param name="param"></param>
        /// <param name="html"></param>
        /// <returns></returns>
        public static bool GetHttp(string url, string methed, string param, out string html)
        {
            methed = methed.ToLower();

            if (param != null && methed == "get" && param.Length > 0)
            {
                url += "?" + param;
            }

            try
            {
                MSXML2.XMLHTTP mx = new MSXML2.XMLHTTPClass();

                mx.open(methed, url, false, null, null);

                if (param != null && methed == "post" && param.Length > 0)
                {
                    mx.setRequestHeader("Content-Length", param.Length.ToString());
                    mx.setRequestHeader("Content-Type", "application/x-www-form-urlencoded");
                }

                mx.send(param);

                if (mx.readyState != 4)
                {
                    html = "远程连接失败:-4";
                    return false;
                }
                html = mx.responseText;
                return true;
            }
            catch (Exception ex)
            {
                html = "远程连接失败:"+ex.Message;
                return false;
            }
        }

        public static bool GetHttp1(string url, string methed, string param, string referer, string encode, out string html)
        {
            //return GetHttp(url,methed,param,out html);

            //string encode = "utf-8";
            //string methed = sendType.ToString();

            if (param != null && methed == "get" && param.Length > 0)
            {
                if (url.IndexOf("?") >= 0)
                {
                    url += "&" + param;
                 //post 结束

 
                WebResponse w = webreq.GetResponse();

                //返回HTML
                using (HttpWebResponse webres = (HttpWebResponse)webreq.GetResponse())
                {
                    using (Stream dataStream = webres.GetResponseStream())
                    {
                        using (StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding(encode)))
                        {
                            html = reader.ReadToEnd();
                            //this.cookieList = webreq.CookieContainer.GetCookies(webreq.RequestUri);
                            webreq.Abort();//可能会解决卡住或阻塞问题
                        }
                    }
                }
            }
            catch (Exception ex)
            {

                html = "出现异常(HttpHelper.GetHTML),远程连接失败:" + ex.Message + " url:" + url;
                //System.Windows.Forms.MessageBox.Show(html);
                return false;
            }

            return true;
        }
分享到 :
0 人收藏
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

积分:1060120
帖子:212021
精华:0
期权论坛 期权论坛
发布
内容

下载期权论坛手机APP