c# 采集源码, 分段采集 全部连接 c#采集类
代码片段和文件信息
using System;
using System.Data;
using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Drawing;
using MSxml2;
using System.Text.Regularexpressions;
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load(object sender EventArgs e)
{
GetHrefsu o = new GetHrefsu();
string Url = @“http://www.cfagri.gov.cn/GOV/News/CF/default_2.shtml“;
string HtmlCode = o.GetRemoteHtmlCode(Url);
string s = o.GetHref(HtmlCode);
string[] tmp=s.Split(‘|‘);
for(int i=0;i {
if (tmp[i].IndexOf(“http://“) > -1)
{
Response.Write(tmp[i].Replace(“““““).Trim()+“
“);
}
else
{
Response.Write(“http://www.cfagri.gov.cn“ + tmp[i].Replace(“““ ““).Trim() + “
“);
}
}
o.Dispose();
//GetHrefsu k = new GetHrefsu();
// Url = @“http://www.cfagri.gov.cn/Gov/news/GN/2010/3/48450.shtml“;
// HtmlCode = k.GetRemoteHtmlCode(Url);
// s = k.RegReplace(HtmlCode “tle>“ “ tle>“);
// Response.Write(s + “
“);
//k.Dispose();
Url = @“http://www.cfagri.gov.cn/gov/news/cf/2010/3/48943.shtml“;
HtmlCode = o.GetRemoteHtmlCode(Url);
// s = o.ReplaceEnter(HtmlCode);
// string Reg = “tle>(.+) tle>“;
string strSource = HtmlCode;
int iBegin = strSource.IndexOf(““);
int iEnd = strSource.IndexOf(“ “iBegin);
strSource = strSource.Substring(iBegin iEnd - iBegin);
Response.Write(strSource.Replace(““ ““));
//string GetValue = o.GetRegValue(Reg HtmlCode);
// Response.Write(GetValue);
//o.Dispose();
// Function getStrBetween(StrStartStrEndStr)
//
//EndStrPos = Instr(StrEndStr)
//Length = EndStrPos - StartStrPos
//Res= Mid(StrStartStrPosLength)
//getStrBetween = Res
//End Function
//Str = “如何获取两个字符串之间的字符串?“
//StartStr =“获取“
//EndStr = “字符串“
//Res = getStrBetween(StrStartStrEndStr)
//Msgbox Res
}
private object Instr(string HtmlCode string p)
{
throw new NotImplementedException();
}
}
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 16253 2010-03-27 16:26 caijiApp_CodeGetHrefsu.cs
文件 86016 2003-10-28 20:00 caijiBinInterop.MSxml2.dll
文件 444 2008-01-11 20:32 caijiDefault.aspx
文件 2919 2010-03-29 09:39 caijiDefault.aspx.cs
文件 688667 2001-09-05 20:00 caijimsxml2homemsxml2.dll
文件 169 2007-01-01 11:13 caijimsxml2
eadme.txt
文件 699392 2003-08-01 04:00 caijimsxml2win2003msxml2.dll
文件 699392 2003-10-28 20:00 caijimsxml2xpmsxml2.dll
文件 2794 2006-11-29 14:57 caijimsxml2说明.htm
文件 837054 2010-03-27 15:47 caijimsxml2.rar
文件 8068 2010-03-27 16:02 caijiweb.config
目录 0 2007-01-03 10:31 caijimsxml2home
目录 0 2007-01-03 10:31 caijimsxml2win2003
目录 0 2007-01-03 10:31 caijimsxml2xp
目录 0 2010-03-27 16:26 caijiApp_Code
目录 0 2010-03-27 15:30 caijiApp_Data
目录 0 2010-03-27 15:51 caijiBin
目录 0 2010-03-27 15:49 caijimsxml2
目录 0 2010-03-31 11:53 caiji
文件 12 2010-03-31 11:54 caiji新建 文本文档.txt
----------- --------- ---------- ----- ----
3041180 20
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件举报,一经查实,本站将立刻删除。
评论列表(条)