view page source (of a webpage)
-
Hi all ! I want to receive page source from a hyperlink. This is may code
public class SourceViewer{
public static void main (String[] args) throws IOException{System.out.print("Enter url of local for viewing html source code: "); //BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
// String url = br.readLine();
//http://vdict.com/Hello,1,0,0.html
String url = "http://vdict.com/Hello,1,0,0.html";
try{
URL u = new URL(url);
HttpURLConnection uc = (HttpURLConnection) u.openConnection();
int code = uc.getResponseCode();
String response = uc.getResponseMessage();
System.out.println("HTTP/1.x " + code + " " + response);
for(int j = 1; ; j++){
String header = uc.getHeaderField(j);
String key = uc.getHeaderFieldKey(j);
if(header == null || key == null)
break;
System.out.println(uc.getHeaderFieldKey(j) + ": " + header);
}
InputStream in = new BufferedInputStream(uc.getInputStream());
Reader r = new InputStreamReader(in);
int c;
while((c = r.read()) != -1){
System.out.print((char)c);
}
}
catch(MalformedURLException ex){
System.err.println(url + " is not a valid URL.");
}
catch(IOException ie){
System.out.println("Input/Output Error: " + ie.getMessage());
}
}
}it work well with some link. But problem with this link: http://vdict.com/Hello,1,0,0.html[^] this is the result form browser and my program: -Browser:you can see by your self. -My program:
Date: Fri, 23 Oct 2009 03:53:22 GMT
Server: Apache/2.2.3 (Red Hat)
X-Powered-By: PHP/5.1.6
Expires: Fri, 30 Oct 2009 03:53:22 GMT
Cache-Control: max-age=360000, must-revalidate
Pragma: public
Last-Modified: Fri, 23 Oct 2009 03:53:22 GMT
Vary: Accept-Encoding
Content-Length: 2102
Content-Type: text/html; charset=UTF-8
Set-Cookie: PHPSESSID=4hgviiktc0gc38aiotd8s6jn37; path=/
Connection: Close
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv='Content-Type' content='text/html; charset=UTF-8' />
<meta name='description' content='Vie -
Hi all ! I want to receive page source from a hyperlink. This is may code
public class SourceViewer{
public static void main (String[] args) throws IOException{System.out.print("Enter url of local for viewing html source code: "); //BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
// String url = br.readLine();
//http://vdict.com/Hello,1,0,0.html
String url = "http://vdict.com/Hello,1,0,0.html";
try{
URL u = new URL(url);
HttpURLConnection uc = (HttpURLConnection) u.openConnection();
int code = uc.getResponseCode();
String response = uc.getResponseMessage();
System.out.println("HTTP/1.x " + code + " " + response);
for(int j = 1; ; j++){
String header = uc.getHeaderField(j);
String key = uc.getHeaderFieldKey(j);
if(header == null || key == null)
break;
System.out.println(uc.getHeaderFieldKey(j) + ": " + header);
}
InputStream in = new BufferedInputStream(uc.getInputStream());
Reader r = new InputStreamReader(in);
int c;
while((c = r.read()) != -1){
System.out.print((char)c);
}
}
catch(MalformedURLException ex){
System.err.println(url + " is not a valid URL.");
}
catch(IOException ie){
System.out.println("Input/Output Error: " + ie.getMessage());
}
}
}it work well with some link. But problem with this link: http://vdict.com/Hello,1,0,0.html[^] this is the result form browser and my program: -Browser:you can see by your self. -My program:
Date: Fri, 23 Oct 2009 03:53:22 GMT
Server: Apache/2.2.3 (Red Hat)
X-Powered-By: PHP/5.1.6
Expires: Fri, 30 Oct 2009 03:53:22 GMT
Cache-Control: max-age=360000, must-revalidate
Pragma: public
Last-Modified: Fri, 23 Oct 2009 03:53:22 GMT
Vary: Accept-Encoding
Content-Length: 2102
Content-Type: text/html; charset=UTF-8
Set-Cookie: PHPSESSID=4hgviiktc0gc38aiotd8s6jn37; path=/
Connection: Close
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv='Content-Type' content='text/html; charset=UTF-8' />
<meta name='description' content='Vie