There are many gotos, but these ones are mine
-
Gotos are frowned on. You should not use gotos. Long live gotos. Until someone comes up with a better/faster/concise way of expressing the following DFA state machine (presented in part) I will continue to defend the use of gotos, even if their use cases have gotten significantly more narrow as progress has marched on. When you need them, there is no better tool.
internal sealed partial class JsonStringRunner : FAStringRunner {
private FAMatch NextMatchImpl(string s) {
int ch;
int len;
int p;
int l;
int c;
ch = -1;
len = 0;
if ((this.position == -1)) {
this.position = 0;
}
p = this.position;
l = this.line;
c = this.column;
this.Advance(s, ref ch, ref len, true);
// q0:
// [\t-\n\r ]
if (((((ch >= 9)
&& (ch <= 10))
|| (ch == 13))
|| (ch == 32))) {
this.Advance(s, ref ch, ref len, false);
goto q1;
}
// [\"]
if ((ch == 34)) {
this.Advance(s, ref ch, ref len, false);
goto q2;
}
// [,]
if ((ch == 44)) {
this.Advance(s, ref ch, ref len, false);
goto q9;
}
// [\-]
if ((ch == 45)) {
this.Advance(s, ref ch, ref len, false);
goto q10;
}
// [0]
if ((ch == 48)) {
this.Advance(s, ref ch, ref len, false);
goto q11;
}
// [1-9]
if (((ch >= 49)
&& (ch <= 57))) {
this.Advance(s, ref ch, ref len, false);
goto q17;
}
// [\:]
if ((ch == 58)) {
this.Advance(s, ref ch, ref len, false);
goto q18;
}
// [\[]
if ((ch == 91)) {
this.Advance(s, ref ch, ref len, false);
goto q19;
}
// [\]]
if ((ch == 93)) {
this.Advance(s, ref ch, ref len, false);
goto q20;
}
// [f]
if ((ch == 102)) {
this.Advance(s, ref ch, ref len, false);Try writing Assembly with out them (the fabled JMP!). They are a tool that get misused (kinda like the powered screw driver).
-
Gotos are frowned on. You should not use gotos. Long live gotos. Until someone comes up with a better/faster/concise way of expressing the following DFA state machine (presented in part) I will continue to defend the use of gotos, even if their use cases have gotten significantly more narrow as progress has marched on. When you need them, there is no better tool.
internal sealed partial class JsonStringRunner : FAStringRunner {
private FAMatch NextMatchImpl(string s) {
int ch;
int len;
int p;
int l;
int c;
ch = -1;
len = 0;
if ((this.position == -1)) {
this.position = 0;
}
p = this.position;
l = this.line;
c = this.column;
this.Advance(s, ref ch, ref len, true);
// q0:
// [\t-\n\r ]
if (((((ch >= 9)
&& (ch <= 10))
|| (ch == 13))
|| (ch == 32))) {
this.Advance(s, ref ch, ref len, false);
goto q1;
}
// [\"]
if ((ch == 34)) {
this.Advance(s, ref ch, ref len, false);
goto q2;
}
// [,]
if ((ch == 44)) {
this.Advance(s, ref ch, ref len, false);
goto q9;
}
// [\-]
if ((ch == 45)) {
this.Advance(s, ref ch, ref len, false);
goto q10;
}
// [0]
if ((ch == 48)) {
this.Advance(s, ref ch, ref len, false);
goto q11;
}
// [1-9]
if (((ch >= 49)
&& (ch <= 57))) {
this.Advance(s, ref ch, ref len, false);
goto q17;
}
// [\:]
if ((ch == 58)) {
this.Advance(s, ref ch, ref len, false);
goto q18;
}
// [\[]
if ((ch == 91)) {
this.Advance(s, ref ch, ref len, false);
goto q19;
}
// [\]]
if ((ch == 93)) {
this.Advance(s, ref ch, ref len, false);
goto q20;
}
// [f]
if ((ch == 102)) {
this.Advance(s, ref ch, ref len, false);Code runs in LinqPad. Code runs in LinqPad. This should be significantly faster than your original code because it speeds up the conditionals by using pattern matching instead of overloadable operators. Also, the local functions can be in-lined, meaning they will be executed in place, which is even more efficient than the `Goto` statements. And now it's not pure spaghetti.
string json = """ { "test": 0, "data": "value" } """; JsonStringRunner runner = new(); List matches = new(); FAMatch current = default; Stopwatch sw = new(); sw.Start(); do{ current = runner.GetMatch(json); matches.Add(current); } while(!runner.isDone); sw.Stop(); matches.Dump(); sw.Dump(); internal record struct FAMatch(int token, string match, int position, int length, int column) { internal static FAMatch Create(int token, string match, int position, int length, int column) => new(token, match, position, length, column); } internal abstract class FAStringRunner { protected int position = -1, line = 0, column = 0; internal bool isDone = false; } internal sealed partial class JsonStringRunner : FAStringRunner { private void Advance(string s, ref int ch, ref int len, bool flag) { // Assuming Advance takes consecutive characters in the string. ch = s\[position\]; position++; len++; isDone = !(position < s.Length); } private FAMatch NextMatchImpl(string s) { int ch; int len; int l; int c; ch = -1; len = 0; if ((this.position is -1)) { this.position = 0; } int p = this.position; l = this.line; c = this.column; this.Advance(s, ref ch, ref len, true); // q0: switch (ch) { // \[\\t-\\n\\r \] case 9 or 10 or 13 or 32: if(ch is 10 or 13){ l = line++; } return q1(); // \[\\"\] case 34: return q2(); // \[,\] case 44: return q9(); // \[\\-\] case
-
trønderen wrote:
If I were given the responsibility for a state machine implementation like that, I would immediately run to my boss asking for permission to rewrite the whole thing as a table driven machine.
... or as a state machine that returns function pointers instead of using tables and state variables:
#include #include // Fn ptrs defs
typedef void (*RT)( int input );
typedef RT (*TER)( int input );// Forward declarations
extern TER state1( int input );
extern TER state2( int input );
extern TER state3( int input );// First state
TER state1( int input )
{
printf( "one\t" );
return input < 10 ? (TER)&state2 : (TER)NULL;
}// Second state
TER state2( int input )
{
printf( "two\t" );
return (TER)&state3;
}// Third state
TER state3( int input )
{
printf( "three\t" );
return (TER)&state1;
}int main(int argc, char* argv[])
{
int n;// Set Start state TER state = (TER)&state1; // Exercises the state machine. Ends when state == NULL for ( n = 0 ; state ; ++n ) { // Executes the current state (state variable) then goes to the next state state = (TER)( state( n ) ); } printf( "\\n\\nPress any key\\n" ); getch(); return 0;
}
Type casts are useful because in C it's impossible to declare function pointers that return function pointers that return function pointers that return function pointers... :) Regards
I hate function pointer dispatch code in general. Because at some point you'll have to debug and maintain it, and you end up with impossible to follow pointer arrays hiding the flow of your app.
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
-
Code runs in LinqPad. Code runs in LinqPad. This should be significantly faster than your original code because it speeds up the conditionals by using pattern matching instead of overloadable operators. Also, the local functions can be in-lined, meaning they will be executed in place, which is even more efficient than the `Goto` statements. And now it's not pure spaghetti.
string json = """ { "test": 0, "data": "value" } """; JsonStringRunner runner = new(); List matches = new(); FAMatch current = default; Stopwatch sw = new(); sw.Start(); do{ current = runner.GetMatch(json); matches.Add(current); } while(!runner.isDone); sw.Stop(); matches.Dump(); sw.Dump(); internal record struct FAMatch(int token, string match, int position, int length, int column) { internal static FAMatch Create(int token, string match, int position, int length, int column) => new(token, match, position, length, column); } internal abstract class FAStringRunner { protected int position = -1, line = 0, column = 0; internal bool isDone = false; } internal sealed partial class JsonStringRunner : FAStringRunner { private void Advance(string s, ref int ch, ref int len, bool flag) { // Assuming Advance takes consecutive characters in the string. ch = s\[position\]; position++; len++; isDone = !(position < s.Length); } private FAMatch NextMatchImpl(string s) { int ch; int len; int l; int c; ch = -1; len = 0; if ((this.position is -1)) { this.position = 0; } int p = this.position; l = this.line; c = this.column; this.Advance(s, ref ch, ref len, true); // q0: switch (ch) { // \[\\t-\\n\\r \] case 9 or 10 or 13 or 32: if(ch is 10 or 13){ l = line++; } return q1(); // \[\\"\] case 34: return q2(); // \[,\] case 44: return q9(); // \[\\-\] case
I'll have to try a variation of this, but what you produced won't function due to the returns. How are you going to loop?
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
-
I'll have to try a variation of this, but what you produced won't function due to the returns. How are you going to loop?
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
Without the full code I didn't know what the logic inside of the various labelled location did, so I simply returned the current substring as a FAMatch. Your method dumps out as an FAMatch so I defaulted to that behavior. The point is that inlined local methods are going to be just as fast as gotos and the pattern matching is much more efficient.
-
If I were given the responsibility for a state machine implementation like that, I would immediately run to my boss asking for permission to rewrite the whole thing as a table driven machine. There is no way, with code like that, that I could guarantee that all inputs/events are properly handled in all cases (or given the proper error treatment). I would have to make a huge effort if I were to report a complete set of normal (non-error) ways to go from a given state to another, and which inputs/events would lead to which error states. I've never written any CP article, but code like this makes my fingers itch to compose an article about proper table driven state machine implementation! Maybe I some day get around to do it :-)
Religious freedom is the freedom to say that two plus two make five.
I'm not sure why it wouldn't be pretty straightforward to [TestCase()] for each of the branching? I don't think this code is very cyclomatically complex? But yeah when you say table driven state machine I'm pretty sure that's where my head is too if you're basically talking a direct map of the case statements to data.
-
I hate function pointer dispatch code in general. Because at some point you'll have to debug and maintain it, and you end up with impossible to follow pointer arrays hiding the flow of your app.
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
-
honey the codewitch wrote:
with impossible to follow pointer arrays
There are no pointer arrays in my code.
Sorry, I was speaking generally about dispatch function pointers. Your statement just remind me of it. Sorry I wasn't clear. I just woke up when I wrote that. :)
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
-
Sorry, I was speaking generally about dispatch function pointers. Your statement just remind me of it. Sorry I wasn't clear. I just woke up when I wrote that. :)
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
-
Without the full code I didn't know what the logic inside of the various labelled location did, so I simply returned the current substring as a FAMatch. Your method dumps out as an FAMatch so I defaulted to that behavior. The point is that inlined local methods are going to be just as fast as gotos and the pattern matching is much more efficient.
Sure, I understand. I did say it was a DFA state machine implementation but unless you're a total FA nerd like I am that probably doesn't mean anything. :) I'm very curious about the inlined local method and pattern matching approach, particularly the IL it generates, because I don't understand how it would be faster than the IL my code produces - particularly my direct compiler which can short circuit the if tests because the comparisons are in sorted order.
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
-
I'm not sure why it wouldn't be pretty straightforward to [TestCase()] for each of the branching? I don't think this code is very cyclomatically complex? But yeah when you say table driven state machine I'm pretty sure that's where my head is too if you're basically talking a direct map of the case statements to data.
There is one issue with that. The compiled ones can be augmented in a way that the table driven ones cannot. For example, I wrote an embedded JSON pull parser in C++. I used compiled DFA code, and then I parsed floats, ints, and bools out of the stream *as* I was lexing, making the API both easier to use and marginally more performant because you didn't have to get the string back and then reexamine it in order to call atoi() or whatever. It was a simple little surgery on the generated code, with excellent results. I admit this isn't the most common case out there, but I have used this technique several times. Edited to add: It's also easier in practice to debug and step through a generated lexer than it is a table driven lexer. And with my Visual FA project, it produces images of directed graphs that map one to one to the labels/jump points in the code.
q0:
maps to the state q0 in the graph. It makes it really easy to see what it's doing, in terms of documenting it.Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
-
Most developers know
goto
statements are "bad" but very few know why or have even read Dijkstra's letter in CACM. And I'm willing to bet most developers haven't heard of the ACM. :(goto
statements that target entry into a block (as you could do in older versions of Fortran and Basic) are frowned upon because they make automated program verification impossible - aka "I can't say with certainty how you got here". Well behaved goto statements are not only fine, you couldn't write code without them. To make it harder for novice programmers to misuse thegoto
statement, many languages such as C, C++, Java and C# (and many others) have created statements that implement well behavedgoto
's. They are:break
- goto the end of aswitch
or terminate the closest enclosing iteration statementcontinue
- start a new iteration of the closest enclosing iteration statementreturn
- exit the function in which it appears and return to the caller
And most (I suspect all) modern compilers won't allow specifying the target of a
goto
into another block. So usegoto
's, but use them the way nature intended. :) /raviMy new year resolution: 2048 x 1536 Home | Articles | My .NET bits | Freeware ravib(at)ravib(dot)com
Break, continue, and return are basically goto, when translated to low level machine codes :thumbsup: Also for-loop, if-else, while-do, switch, etc. Gotos are frowned because some people used it badly. Maybe they caused infinite loop or something. Maybe they forgot to free the allocated memory. Also it shouldn't be used when your high level language provides more explanatory keywords above. The reason is obviously, for maintainability and readibility purpose.
-
I hate function pointer dispatch code in general. Because at some point you'll have to debug and maintain it, and you end up with impossible to follow pointer arrays hiding the flow of your app.
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
honey the codewitch wrote:
I hate function pointer dispatch code in general.
Do you refuse to use delegates at all, or don't you consider those to be function pointers? (In other words: Are function pointers OK as long as they are called delegates?) No, when you have generated your code, you do not "at some time have to debug and maintain" the generated code. You debug and maintain your source, not the compilation result. Not even if you can, sort of, read it. Executable binaries can also be disassembled into "readable" code - the readability is no argument for random peek and poke. You send your code through a generator/compiler, and want to patch up the complied result ("The compiled ones can be augmented in a way that the table driven ones cannot"), or complain about the instructions generated by the compiler - I haven't heard anyone saying any such thing in earnest for a decade or two. Some people still believe that they can do smarter heap management than the standard heap manager, rejecting automated garbage collection and smart pointers, but for the most part, compilers became smarter than human coders in the last millennium. You will see a lot of function pointer dispatch code in the generated code from a plain C++ compiler. Do you hate that as well? If you accept it from a C++ compiler, why do you have problems accepting it from other compilers? (The first C++ compiler I used didn't produce binary code - it was a machine independent compiler producing K&R C to be fed into a machine specific compiler. So we had full access to the C code for patching it up before passing it on to cc. We did not. I would not do it with any generated code, whether the compiler is called C++ or Visual FA.)
Religious freedom is the freedom to say that two plus two make five.
-
honey the codewitch wrote:
I hate function pointer dispatch code in general.
Do you refuse to use delegates at all, or don't you consider those to be function pointers? (In other words: Are function pointers OK as long as they are called delegates?) No, when you have generated your code, you do not "at some time have to debug and maintain" the generated code. You debug and maintain your source, not the compilation result. Not even if you can, sort of, read it. Executable binaries can also be disassembled into "readable" code - the readability is no argument for random peek and poke. You send your code through a generator/compiler, and want to patch up the complied result ("The compiled ones can be augmented in a way that the table driven ones cannot"), or complain about the instructions generated by the compiler - I haven't heard anyone saying any such thing in earnest for a decade or two. Some people still believe that they can do smarter heap management than the standard heap manager, rejecting automated garbage collection and smart pointers, but for the most part, compilers became smarter than human coders in the last millennium. You will see a lot of function pointer dispatch code in the generated code from a plain C++ compiler. Do you hate that as well? If you accept it from a C++ compiler, why do you have problems accepting it from other compilers? (The first C++ compiler I used didn't produce binary code - it was a machine independent compiler producing K&R C to be fed into a machine specific compiler. So we had full access to the C code for patching it up before passing it on to cc. We did not. I would not do it with any generated code, whether the compiler is called C++ or Visual FA.)
Religious freedom is the freedom to say that two plus two make five.
I was going to respond, but I think I answered all this in the post you responded to
Because at some point you'll have to debug and maintain it, and you end up with impossible to follow pointer arrays hiding the flow of your app.
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
-
I was going to respond, but I think I answered all this in the post you responded to
Because at some point you'll have to debug and maintain it, and you end up with impossible to follow pointer arrays hiding the flow of your app.
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
The real issue is:
honey the codewitch wrote:
at some point you'll have to debug and maintain it
Does that apply to the code generated by your C, C++ or C# compiler as well? When are you going to start trusting your tools to do at least as good a job as the one you are doing yourself? I think: If you don't trust your tools to do a good enough job, throw them away and do the job yourself!
Religious freedom is the freedom to say that two plus two make five.
-
The real issue is:
honey the codewitch wrote:
at some point you'll have to debug and maintain it
Does that apply to the code generated by your C, C++ or C# compiler as well? When are you going to start trusting your tools to do at least as good a job as the one you are doing yourself? I think: If you don't trust your tools to do a good enough job, throw them away and do the job yourself!
Religious freedom is the freedom to say that two plus two make five.
It does not typically apply to generated code because the maintenance of that is moved to the generated code's input specification - in other words, whatever document or resource it uses to generate the code from. THAT is what needs to be maintained. It does not apply to compiled code either, for exactly the same reason (the compiler being yet another code generator)
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
-
Gotos are frowned on. You should not use gotos. Long live gotos. Until someone comes up with a better/faster/concise way of expressing the following DFA state machine (presented in part) I will continue to defend the use of gotos, even if their use cases have gotten significantly more narrow as progress has marched on. When you need them, there is no better tool.
internal sealed partial class JsonStringRunner : FAStringRunner {
private FAMatch NextMatchImpl(string s) {
int ch;
int len;
int p;
int l;
int c;
ch = -1;
len = 0;
if ((this.position == -1)) {
this.position = 0;
}
p = this.position;
l = this.line;
c = this.column;
this.Advance(s, ref ch, ref len, true);
// q0:
// [\t-\n\r ]
if (((((ch >= 9)
&& (ch <= 10))
|| (ch == 13))
|| (ch == 32))) {
this.Advance(s, ref ch, ref len, false);
goto q1;
}
// [\"]
if ((ch == 34)) {
this.Advance(s, ref ch, ref len, false);
goto q2;
}
// [,]
if ((ch == 44)) {
this.Advance(s, ref ch, ref len, false);
goto q9;
}
// [\-]
if ((ch == 45)) {
this.Advance(s, ref ch, ref len, false);
goto q10;
}
// [0]
if ((ch == 48)) {
this.Advance(s, ref ch, ref len, false);
goto q11;
}
// [1-9]
if (((ch >= 49)
&& (ch <= 57))) {
this.Advance(s, ref ch, ref len, false);
goto q17;
}
// [\:]
if ((ch == 58)) {
this.Advance(s, ref ch, ref len, false);
goto q18;
}
// [\[]
if ((ch == 91)) {
this.Advance(s, ref ch, ref len, false);
goto q19;
}
// [\]]
if ((ch == 93)) {
this.Advance(s, ref ch, ref len, false);
goto q20;
}
// [f]
if ((ch == 102)) {
this.Advance(s, ref ch, ref len, false);Seems likely that would be faster with an array look up versus those sequential ifs.
if (match[ch])
...honey the codewitch wrote:
if (((((ch >= 9) && (ch <= 10)) || (ch == 13)) || (ch == 32))) {
Seems unlikely that that would be better than
(ch == 9) || (ch == 10) || (ch == 13) || (ch == 32))
-
Seems likely that would be faster with an array look up versus those sequential ifs.
if (match[ch])
...honey the codewitch wrote:
if (((((ch >= 9) && (ch <= 10)) || (ch == 13)) || (ch == 32))) {
Seems unlikely that that would be better than
(ch == 9) || (ch == 10) || (ch == 13) || (ch == 32))
What's funny is my table driven code does exactly that. Sometimes I get different results depending on the lexer complexity, but for simple lexers at least the compiled versions run slightly faster. With large lexers the table method starts to outstrip it. I should note, the lexer size has nothing to do with the number of comparisons in those ifs - but rather in essense the number of ifs - really the number of goto labels.
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
-
It does not typically apply to generated code because the maintenance of that is moved to the generated code's input specification - in other words, whatever document or resource it uses to generate the code from. THAT is what needs to be maintained. It does not apply to compiled code either, for exactly the same reason (the compiler being yet another code generator)
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix
But if the code is generated by Visual FA rather than cc, then you will do peek and poke on the generated code. Well, that is choice. I think you are on the wrong track. In the 1980s, I worked in a company distributing OS patches as Poke instructions. I wouldn't condone that practice today.
Religious freedom is the freedom to say that two plus two make five.
-
But if the code is generated by Visual FA rather than cc, then you will do peek and poke on the generated code. Well, that is choice. I think you are on the wrong track. In the 1980s, I worked in a company distributing OS patches as Poke instructions. I wouldn't condone that practice today.
Religious freedom is the freedom to say that two plus two make five.
then you will do peek and poke on the generated code.
I will? That's news to me. Hell, with VisualFA.SourceGenerator you don't even see the generated code. It's hidden by visual studio.
Check out my IoT graphics library here: https://honeythecodewitch.com/gfx And my IoT UI/User Experience library here: https://honeythecodewitch.com/uix