public partial class Form1 : Form
{
int y = 0;
string url = @"http://www.google.co.il";
string urls = @"http://www.bing.com/images/search?q=cat&go=&form=QB&qs=n";
public Form1()
{
InitializeComponent();
//webCrawler(urls, 3);
List<string> a = webCrawler(urls, 1);
//GetAllImages();
}
private int factorial(int n)
{
if (n == 0) return 1;
else y = n * factorial(n - 1);
listBox1.Items.Add(y);
return y;
}
private List<string> getLinks(HtmlAgilityPack.HtmlDocument document)
{
List<string> mainLinks = new List<string>();
if (document.DocumentNode.SelectNodes("//a[@href]") == null)
{ }
foreach (HtmlNode link in document.DocumentNode.SelectNodes("//a[@href]"))
{
var href = link.Attributes["href"].Value;
mainLinks.Add(href);
}
return mainLinks;
}
private List<string> webCrawler(string url, int levels)
{
HtmlAgilityPack.HtmlDocument doc;
HtmlWeb hw = new HtmlWeb();
List<string> webSites;// = new List<string>();
List<string> csFiles = new List<string>();
csFiles.Add("temp string to know that something is happening in level = " + levels.ToString());
csFiles.Add("current site name in this level is : "+url);
/* later should be replaced with real cs files .. cs files links..*/
doc = hw.Load(url);
webSites = getLinks(doc);
if (levels == 0)
{
return csFiles;
}
else
{
int actual_sites = 0;
for (int i = 0; i < webSites.Count() && i< 100000; i++) // limiting ourseleves for 20 sites for each level for now..
//or it will take forever.
{
string t = webSites[i];
/*
if (!webSites.Contains(t))
{
webCrawler(t, levels - 1);
}
*/
if ( (t.StartsWith("http://")==true) || (t.StartsWith("https://")==true) ) // replace this with future FilterJunkLinks function
{
actual_sites++;
csFiles.AddRange(webCrawler(t, levels - 1));
richTextBox1.Text += t + Environment.NewLine;
}
}
// report to a message box only at high levels..
if (levels==1)
MessageBox.Show(actual_sites.ToString());
return csFiles;
}
}
少数网站被送至getLinks
功能之后,便将这一例外情况推向了。
The exception is in the getLinks function on the line:
foreach (HtmlNode link in document.DocumentNode.SelectNodes("//a[@href]"))
不针对物体的事例提出反对
我试图利用国际红十字与红新月联会来检查其失效后,我是否填写了return mainLinks;
。
但是,如果我这样做的话,我就不从网站上获取所有链接。
如今,在构造中使用尿素的Im(www.google.co.il
) 在几秒之后,我就获得同样的例外。
我可以说明为什么这一例外正在消失。 是否存在这一例外的理由?
System.NullReferenceException was unhandled
Message=不针对物体的事例提出反对.
Source=GatherLinks
StackTrace:
at GatherLinks.Form1.getLinks(HtmlDocument document) in D:C-SharpGatherLinksGatherLinksGatherLinksForm1.cs:line 55
at GatherLinks.Form1.webCrawler(String url, Int32 levels) in D:C-SharpGatherLinksGatherLinksGatherLinksForm1.cs:line 76
at GatherLinks.Form1.webCrawler(String url, Int32 levels) in D:C-SharpGatherLinksGatherLinksGatherLinksForm1.cs:line 104
at GatherLinks.Form1..ctor() in D:C-SharpGatherLinksGatherLinksGatherLinksForm1.cs:line 29
at GatherLinks.Program.Main() in D:C-SharpGatherLinksGatherLinksGatherLinksProgram.cs:line 18
at System.AppDomain._nExecuteAssembly(Assembly assembly, String[] args)
at System.AppDomain.ExecuteAssembly(String assemblyFile, Evidence assemblySecurity, String[] args)
at Microsoft.VisualStudio.HostingProcess.HostProc.RunUsersAssembly()
at System.Threading.ThreadHelper.ThreadStart_Context(Object state)
at System.Threading.ExecutionContext.Run(ExecutionContext executionContext, ContextCallback callback, Object state)
at System.Threading.ThreadHelper.ThreadStart()