CInternetSession OpenURL
-
That is an error message. The server was not able to answer the request. If you enter that address in your browser you should have noticed that it redirects to an ASPX page. A possible reason might be that the server script tries to access a request header which is not present and the code does not handle that. Candidates are (among others)
User-Agent
,Content-Type
, andAccept
. But finally only the administrator of that web site can tell you. -
Ok, and if I know them (User-Agent, Content-Type, etc.), how can I use it in OpenURL method ?
Fourth parameter of
OpenURL()
. -
Fourth parameter of
OpenURL()
. -
That is an error message. The server was not able to answer the request. If you enter that address in your browser you should have noticed that it redirects to an ASPX page. A possible reason might be that the server script tries to access a request header which is not present and the code does not handle that. Candidates are (among others)
User-Agent
,Content-Type
, andAccept
. But finally only the administrator of that web site can tell you.I have tried something like this:
_T("Cache-Control: no-store\r\nPragma: no-cache\r\nExpires: 0\r\nContent-Type: text/html; charset=utf-8\r\nX-UA-Compatible: IE=edge,chrome=1\r\nauthor: Ministerul Finantelor Publice\r\ndescription: \r\nkeywords: \r\n\r\n")
The same result … very strange …
-
That is an error message. The server was not able to answer the request. If you enter that address in your browser you should have noticed that it redirects to an ASPX page. A possible reason might be that the server script tries to access a request header which is not present and the code does not handle that. Candidates are (among others)
User-Agent
,Content-Type
, andAccept
. But finally only the administrator of that web site can tell you.And a similar problem, where I get only headers, without body web source: http://mfinante.ro/infocodfiscal.html[^] and when I have tried to get the web source, where is the result:
(function(){
var securemsg; var dosl7\_common;
window.KBov=!!window.KBov;try{(function(){try{var ll,Ll,Ol=1,Sl=1,il=1,jl=1,Jl=1;for(var ZL=0;ZL<Ll;++ZL)Ol+=2,Sl+=2,il+=2,jl+=2,Jl+=3;ll=Ol+Sl+il+jl+Jl;window.JL===ll&&(window.JL=++ll)}catch(sL){window.JL=ll}var iL=!0;function lo(l){!l||document.visibilityState&&"visible"!==document.visibilityState||(iL=!1,document.cookie="brav=ad");return iL}function Lo(){}lo(window[Lo.name]===Lo);lo("function"!==typeof ie9rgb4);lo(/\x3c/.test(function(){return"\x3c"})&!/x3d/.test(function(){return"'x3'+'d';"}));
var Oo=window.attachEvent||/mobi/i.test(window["\x6e\x61vi\x67a\x74\x6f\x72"]["\x75\x73e\x72A\x67\x65\x6et"]),Io=+new Date+6E5,lO,LO,oO,zO=setTimeout,ZO=Oo?3E4:6E3;function SO(){if(!document.querySelector)return!0;var l=+new Date,z=l>Io;if(z)return lo(!1);z=LO&&!oO&&lO+ZO<l;z=lo(z);lO=l;LO||(LO=!0,zO(function(){LO=!1},1));return z}SO();
document.addEventListener&&document.addEventListener("visibilitychange",function(l){document.visibilityState&&("hidden"===document.visibilityState&&l.isTrusted?oO=!0:"visible"===document.visibilityState&&(lO=+new Date,oO=!1,SO()))});var iO=[17795081,27611931586,1558153217];function jO(l){l="string"===typeof l?l:l.toString(36);var z=window[l];if(!z.toString)return;var s=""+z;window[l]=function(l,s){LO=!1;return z(l,s)};window[l].toString=function(){return s}}for(var JO=0;JO<iO.length;++JO)jO(iO[JO]);
})();
(function(){
var securemsg; var dosl7\_common;
window["blobfp"] = "1111111110112000003e825d0550f830000004a71d70c295b19a7f2005afaa33b00001c20eac9549cdc897b01acfc003937e6e0f02eceda17300000020http://re.security.f5aas.com/re/";
</x-turndown>
-
And a similar problem, where I get only headers, without body web source: http://mfinante.ro/infocodfiscal.html[^] and when I have tried to get the web source, where is the result:
(function(){
var securemsg; var dosl7\_common;
window.KBov=!!window.KBov;try{(function(){try{var ll,Ll,Ol=1,Sl=1,il=1,jl=1,Jl=1;for(var ZL=0;ZL<Ll;++ZL)Ol+=2,Sl+=2,il+=2,jl+=2,Jl+=3;ll=Ol+Sl+il+jl+Jl;window.JL===ll&&(window.JL=++ll)}catch(sL){window.JL=ll}var iL=!0;function lo(l){!l||document.visibilityState&&"visible"!==document.visibilityState||(iL=!1,document.cookie="brav=ad");return iL}function Lo(){}lo(window[Lo.name]===Lo);lo("function"!==typeof ie9rgb4);lo(/\x3c/.test(function(){return"\x3c"})&!/x3d/.test(function(){return"'x3'+'d';"}));
var Oo=window.attachEvent||/mobi/i.test(window["\x6e\x61vi\x67a\x74\x6f\x72"]["\x75\x73e\x72A\x67\x65\x6et"]),Io=+new Date+6E5,lO,LO,oO,zO=setTimeout,ZO=Oo?3E4:6E3;function SO(){if(!document.querySelector)return!0;var l=+new Date,z=l>Io;if(z)return lo(!1);z=LO&&!oO&&lO+ZO<l;z=lo(z);lO=l;LO||(LO=!0,zO(function(){LO=!1},1));return z}SO();
document.addEventListener&&document.addEventListener("visibilitychange",function(l){document.visibilityState&&("hidden"===document.visibilityState&&l.isTrusted?oO=!0:"visible"===document.visibilityState&&(lO=+new Date,oO=!1,SO()))});var iO=[17795081,27611931586,1558153217];function jO(l){l="string"===typeof l?l:l.toString(36);var z=window[l];if(!z.toString)return;var s=""+z;window[l]=function(l,s){LO=!1;return z(l,s)};window[l].toString=function(){return s}}for(var JO=0;JO<iO.length;++JO)jO(iO[JO]);
})();
(function(){
var securemsg; var dosl7\_common;
window["blobfp"] = "1111111110112000003e825d0550f830000004a71d70c295b19a7f2005afaa33b00001c20eac9549cdc897b01acfc003937e6e0f02eceda17300000020http://re.security.f5aas.com/re/";
</x-turndown>
It is a valid reply and the important parts are the
script
tags which are executed by web browsers showing the final content. If JavaScript is disabled in a browser, the content of thenoscript
tag is shown:Please enable JavaScript to view the page content.
That is the problem with todays web sites: They don't use plain HTML anymore. Even well known tools like wget don't support JavaScript and can't be therefore used to download the "visible" content (which may vary with attributes like media type and screen resolution). You would have to use a client that is able to do all the stuff that a web browser can do besides the final rendering.
-
It is a valid reply and the important parts are the
script
tags which are executed by web browsers showing the final content. If JavaScript is disabled in a browser, the content of thenoscript
tag is shown:Please enable JavaScript to view the page content.
That is the problem with todays web sites: They don't use plain HTML anymore. Even well known tools like wget don't support JavaScript and can't be therefore used to download the "visible" content (which may vary with attributes like media type and screen resolution). You would have to use a client that is able to do all the stuff that a web browser can do besides the final rendering.
Thank you Jochen. I had tried to create an html client with CHtmlView, and here is the result:
void CTestHTMLView::OnInitialUpdate()
{
CHtmlView::OnInitialUpdate();Navigate2(\_T("http://www.mfinante.ro/pagina.html"), NULL, NULL);
}
result is blank page … is there impossible to solve this programmatically, in this case ?
-
Thank you Jochen. I had tried to create an html client with CHtmlView, and here is the result:
void CTestHTMLView::OnInitialUpdate()
{
CHtmlView::OnInitialUpdate();Navigate2(\_T("http://www.mfinante.ro/pagina.html"), NULL, NULL);
}
result is blank page … is there impossible to solve this programmatically, in this case ?
Use the
CHtmlView GetSource()
method to inspect the source. It will probably like those you have already got and should contain hints about what is missing. Note also that the IE settings are used which might be too restrictive. -
Use the
CHtmlView GetSource()
method to inspect the source. It will probably like those you have already got and should contain hints about what is missing. Note also that the IE settings are used which might be too restrictive.I had tried to use CHtmlView::GetSource(), but the string was exactly like the one from CInternetSession::OpenURL() method … no difference between them …
void CTestHTMLView::OnHelpGetsource()
{
// TODO: Add your command handler code hereCString s; GetSource(s);
}
and the result is:
(function(){
var securemsg;
var dosl7_common;window["bobcmn"] = "1111101111101020000000220000000520000000025705570d200000096300000000300000000300000006/TSPD/300000008TSPD_101300000004http200000000200000000";
window.SqNs=!!window.SqNs;try{(function(){try{var ss,Ss,is=1,Js=1,Ls=1,zs=1,Zs=1;for(var jS=0;jS<Ss;++jS)is+=2,Js+=2,Ls+=2,zs+=2,Zs+=3;ss=is+Js+Ls+zs+Zs;window.lJ===ss&&(window.lJ=++ss)}catch(JS){window.lJ=ss}var oS=!0;function ZS(s){!s||document.visibilityState&&"visible"!==document.visibilityState||(oS=!1,document.cookie="brav=ad");return oS}function s_(){}ZS(window[s_.name]===s_);ZS("function"!==typeof ie9rgb4);ZS(/\x3c/.test(function(){return"\x3c"})&!/x3d/.test(function(){return"'x3'+'d';"}));
var __=window.attachEvent||/mobi/i.test(window["\x6e\x61vi\x67a\x74\x6f\x72"]["\x75\x73e\x72A\x67\x65\x6et"]),o_=+new Date+6E5,Z_,si,Si,ii=setTimeout,Ii=__?3E4:6E3;function ji(){if(!document.querySelector)return!0;var s=+new Date,I=s>o_;if(I)return ZS(!1);I=si&&!Si&&Z_+Ii<s;I=ZS(I);Z_=s;si||(si=!0,ii(function(){si=!1},1));return I}ji();
document.addEventListener&&document.addEventListener("visibilitychange",function(s){document.visibilityState&&("hidden"===document.visibilityState&&s.isTrusted?Si=!0:"visible"===document.visibilityState&&(Z_=+new Date,Si=!1,ji()))});var li=[17795081,27611931586,1558153217];function Li(s){s="string"===typeof s?s:s.toString(36);var I=window[s];if(!I.toString)return;var l=""+I;window[s]=function(s,l){si=!1;return I(s,l)};window[s].toString=function(){return l}}for(var oi=0;oi<li.length;++oi)Li(li[oi]);
ZS(!1!==window.SqNs);
(function(){var s=-1,s={o:++s,oL:"false"[s],S:++s,iI:"false"[s],jS:++s,_5:"[object Object]"[s],Ij:(s[s]+"")[s],jI:++s,ij:"true"[s],_S:++s,LS:++s,OL:"[object Object]"[s],L:++s,sS:++s,ljS:++s,JjS:++s};try{s._I=(s._I=s+"")[s.LS]+(s.Oi=s._I[s.S])+(s.LL=(s.oi+"")[s.S])+(!s+"")[s.jI]+(s.zi=s._</x-turndown> -
I had tried to use CHtmlView::GetSource(), but the string was exactly like the one from CInternetSession::OpenURL() method … no difference between them …
void CTestHTMLView::OnHelpGetsource()
{
// TODO: Add your command handler code hereCString s; GetSource(s);
}
and the result is:
(function(){
var securemsg;
var dosl7_common;window["bobcmn"] = "1111101111101020000000220000000520000000025705570d200000096300000000300000000300000006/TSPD/300000008TSPD_101300000004http200000000200000000";
window.SqNs=!!window.SqNs;try{(function(){try{var ss,Ss,is=1,Js=1,Ls=1,zs=1,Zs=1;for(var jS=0;jS<Ss;++jS)is+=2,Js+=2,Ls+=2,zs+=2,Zs+=3;ss=is+Js+Ls+zs+Zs;window.lJ===ss&&(window.lJ=++ss)}catch(JS){window.lJ=ss}var oS=!0;function ZS(s){!s||document.visibilityState&&"visible"!==document.visibilityState||(oS=!1,document.cookie="brav=ad");return oS}function s_(){}ZS(window[s_.name]===s_);ZS("function"!==typeof ie9rgb4);ZS(/\x3c/.test(function(){return"\x3c"})&!/x3d/.test(function(){return"'x3'+'d';"}));
var __=window.attachEvent||/mobi/i.test(window["\x6e\x61vi\x67a\x74\x6f\x72"]["\x75\x73e\x72A\x67\x65\x6et"]),o_=+new Date+6E5,Z_,si,Si,ii=setTimeout,Ii=__?3E4:6E3;function ji(){if(!document.querySelector)return!0;var s=+new Date,I=s>o_;if(I)return ZS(!1);I=si&&!Si&&Z_+Ii<s;I=ZS(I);Z_=s;si||(si=!0,ii(function(){si=!1},1));return I}ji();
document.addEventListener&&document.addEventListener("visibilitychange",function(s){document.visibilityState&&("hidden"===document.visibilityState&&s.isTrusted?Si=!0:"visible"===document.visibilityState&&(Z_=+new Date,Si=!1,ji()))});var li=[17795081,27611931586,1558153217];function Li(s){s="string"===typeof s?s:s.toString(36);var I=window[s];if(!I.toString)return;var l=""+I;window[s]=function(s,l){si=!1;return I(s,l)};window[s].toString=function(){return l}}for(var oi=0;oi<li.length;++oi)Li(li[oi]);
ZS(!1!==window.SqNs);
(function(){var s=-1,s={o:++s,oL:"false"[s],S:++s,iI:"false"[s],jS:++s,_5:"[object Object]"[s],Ij:(s[s]+"")[s],jI:++s,ij:"true"[s],_S:++s,LS:++s,OL:"[object Object]"[s],L:++s,sS:++s,ljS:++s,JjS:++s};try{s._I=(s._I=s+"")[s.LS]+(s.Oi=s._I[s.S])+(s.LL=(s.oi+"")[s.S])+(!s+"")[s.jI]+(s.zi=s._</x-turndown>Like before: All output is done by linked JavaScript. If that is not shown you have to change the IE settings which probably block the execution. But I don't know if
CHtmlView
supports all IE features. -
I had tried to use CHtmlView::GetSource(), but the string was exactly like the one from CInternetSession::OpenURL() method … no difference between them …
void CTestHTMLView::OnHelpGetsource()
{
// TODO: Add your command handler code hereCString s; GetSource(s);
}
and the result is:
(function(){
var securemsg;
var dosl7_common;window["bobcmn"] = "1111101111101020000000220000000520000000025705570d200000096300000000300000000300000006/TSPD/300000008TSPD_101300000004http200000000200000000";
window.SqNs=!!window.SqNs;try{(function(){try{var ss,Ss,is=1,Js=1,Ls=1,zs=1,Zs=1;for(var jS=0;jS<Ss;++jS)is+=2,Js+=2,Ls+=2,zs+=2,Zs+=3;ss=is+Js+Ls+zs+Zs;window.lJ===ss&&(window.lJ=++ss)}catch(JS){window.lJ=ss}var oS=!0;function ZS(s){!s||document.visibilityState&&"visible"!==document.visibilityState||(oS=!1,document.cookie="brav=ad");return oS}function s_(){}ZS(window[s_.name]===s_);ZS("function"!==typeof ie9rgb4);ZS(/\x3c/.test(function(){return"\x3c"})&!/x3d/.test(function(){return"'x3'+'d';"}));
var __=window.attachEvent||/mobi/i.test(window["\x6e\x61vi\x67a\x74\x6f\x72"]["\x75\x73e\x72A\x67\x65\x6et"]),o_=+new Date+6E5,Z_,si,Si,ii=setTimeout,Ii=__?3E4:6E3;function ji(){if(!document.querySelector)return!0;var s=+new Date,I=s>o_;if(I)return ZS(!1);I=si&&!Si&&Z_+Ii<s;I=ZS(I);Z_=s;si||(si=!0,ii(function(){si=!1},1));return I}ji();
document.addEventListener&&document.addEventListener("visibilitychange",function(s){document.visibilityState&&("hidden"===document.visibilityState&&s.isTrusted?Si=!0:"visible"===document.visibilityState&&(Z_=+new Date,Si=!1,ji()))});var li=[17795081,27611931586,1558153217];function Li(s){s="string"===typeof s?s:s.toString(36);var I=window[s];if(!I.toString)return;var l=""+I;window[s]=function(s,l){si=!1;return I(s,l)};window[s].toString=function(){return l}}for(var oi=0;oi<li.length;++oi)Li(li[oi]);
ZS(!1!==window.SqNs);
(function(){var s=-1,s={o:++s,oL:"false"[s],S:++s,iI:"false"[s],jS:++s,_5:"[object Object]"[s],Ij:(s[s]+"")[s],jI:++s,ij:"true"[s],_S:++s,LS:++s,OL:"[object Object]"[s],L:++s,sS:++s,ljS:++s,JjS:++s};try{s._I=(s._I=s+"")[s.LS]+(s.Oi=s._I[s.S])+(s.LL=(s.oi+"")[s.S])+(!s+"")[s.jI]+(s.zi=s._</x-turndown>_Flaviu wrote:
I really don't know hot to overcome this
The javascript and/or webassembly needs to execute. Websites are not flat files anymore. You will need to use a complete browser engine to parse the DOM. In other words most websites today are generating dynamic content via javascript. You need to think outside the box here... that javascript you see needs to execute in order to generate the page. One quick way to do this would be using a hidden Internet Explorer window as the backend. Creating a Web Browser-Style MFC Application[^]. You could set the CHtmlView to load the site and dump the top document after javascript has modified the DOM. Some of my tools are using a custom webkit[^] as the backend to do this. You can also use Chromium Embedded[^]. You could probably spend less than a day modifying cefsimple[^] to load the website and dump the top document to file after javascript has executed. Good Luck. Best Wishes, -David Delaune