tried to migrate C code automatically

ichiriac · ichiriac · commit 1488db509b55 · 2019-08-29T05:30:32.000+02:00
diff --git a/poc/grammar/ctok.js b/poc/grammar/ctok.js
@@ -0,0 +1,123 @@
+const tokenizer = function(str) {
+    let tokens = [];
+    let state = 0;
+    let token = null;
+    var flush = function(nextState, init) {
+        if (state === nextState) {
+            token += init;
+        } else {
+            if (token) {
+                tokens.push([state, token]);
+            }
+            state = nextState;
+            token = init;
+        }
+    }
+    for(let i = 0; i < str.length; i++) {
+        let char = str[i];
+        if (char === ' ' || char === '\t' || char === '\r' || char === '\n') {
+            if (state === tokenizer.STATE_WS) {
+                token += char;
+            } else {
+                flush(tokenizer.STATE_WS, char);
+            }
+            continue;
+        } 
+        if (char === '"') {
+            flush(tokenizer.STATE_TXT, char);
+            while(i < str.length) {
+                if (str[++i] === "\\") {
+                    i++;
+                    token += "\\" + str[i];
+                } else {
+                    token += str[i];
+                }
+                if (str[i] === '"') break;
+            }
+            flush();
+            continue;
+        } 
+        if (char === '\'') {
+            flush(tokenizer.STATE_TXT, char);
+            while(i < str.length) {
+                if (str[++i] === "\\") {
+                    i++;
+                    token += "\\" + str[i];
+                } else {
+                    token += str[i];
+                }
+                if (str[i] === '\'') break;
+            }
+            flush();
+            continue;
+        } 
+        
+        let ch = str.charCodeAt(i);
+        if (
+          (ch > 96 && ch < 123) ||
+          (ch > 64 && ch < 91) ||
+          ch === 95 ||
+          (ch > 47 && ch < 58) ||
+          ch > 126
+        ) {
+            if (state === tokenizer.STATE_ID) {
+                token += char;
+            } else {
+                flush(tokenizer.STATE_ID, char);
+            }
+            continue;
+        }
+
+        if (token === '/') {
+            let next = str[i + 1];
+            if (next === '/') {
+                flush(tokenizer.STATE_COM, char);
+                while(i < str.length) {
+                    token += str[++i];
+                    if (str[i] === '\r') break;
+                    if (str[i] === '\n') break;
+                }
+                flush();
+                continue;
+            }
+            if (next === '*') {
+                flush(tokenizer.STATE_COM, char);
+                while(i < str.length) {
+                    token += str[++i];
+                    if (str[i] === '*') {
+                        token += str[++i];
+                        if (str[i] === '/') break;
+                    }
+                }
+                flush();
+                continue;
+            }
+        }
+
+        if (token === '#') {
+            flush(tokenizer.STATE_DIR, char);
+            while(i < str.length) {
+                token += str[++i];
+                if (str[i] === '\r') break;
+                if (str[i] === '\n') break;
+            }
+            flush();
+            continue;            
+        }
+        if (token) {
+            flush(tokenizer.STATE_OTHER, null);
+        }
+        tokens.push([tokenizer.STATE_OTHER, char]);
+    }
+    flush();
+    return tokens;
+};
+
+tokenizer.STATE_WS = "whitespace";
+tokenizer.STATE_ID = "identifier";
+tokenizer.STATE_COM = "comment";
+tokenizer.STATE_TXT = "string";
+tokenizer.STATE_OTHER = "any";
+tokenizer.STATE_DIR = "directive";
+
+module.exports = tokenizer;
diff --git a/poc/grammar/lexer.js b/poc/grammar/lexer.js
@@ -3,6 +3,7 @@
  */
 const lexer = require('jison-gho').lexParser;
 const fs = require('fs');
+const tokenize = require('./ctok');
 module.exports = function(filename, destination) {
     fs.readFile(filename, function(err, contents) { 
         contents = contents.toString();
@@ -17,12 +18,37 @@ module.exports = function(filename, destination) {
         // handle script blocks
         contents = contents.replace(/(\<[^\>]+[^\n]+)\s+{\n*/g, '$1 %{\n');
         contents = contents.replace(/\n+\}\n/g, '\n%}\n');
+
         // locate macros
         let macro = contents.indexOf('\n<');
-        contents = contents.substring(0, macro - 1) + '\n%%\n' + contents.substring(macro + 1);
+        contents = contents.substring(0, macro) + '\n%options case-insensitive\n\n%%\n\n' + contents.substring(macro + 1);
+        
         // migrate each macro
+        let lexerTokens = [];
         contents = contents.replace(/^(\<.*?\>)([^\n]+)\s+\%\{(.*?)\%\}/gms, function(text, state, tag, script) {
-            return state + tag + '\treturn null /* @todo ' + tag + '*/ ;';
+            console.log(script);
+            let src = '';
+            const tokens = tokenize(script);
+            for(let i = 0; i < tokens.length; i++) {
+                let token = tokens[i];
+                switch(token[1]) {
+                    case 'RETURN_TOKEN':
+                        let tok = tokens[i + 2][1];
+                        src += 'return ' + tok + ';';
+                        if (lexerTokens.indexOf(tok) === -1)  {
+                            lexerTokens.push(tok);
+                        }
+                        i += 4;
+                        break;
+                    case 'goto':
+                        // ignore goto
+                        i += 3;
+                        break;
+                    default:
+                        src += token[1];
+                }                
+            }
+            return state + tag + '\t{ \n ' + src + '\n}';
         });
         try {
             const ast = lexer.parse(contents);
diff --git a/poc/zend/php7/lexer.l b/poc/zend/php7/lexer.l
@@ -2005,7 +2005,7 @@ string:
 	RETURN_TOKEN(T_NS_C);
 }
 
-<SHEBANG>"#!" .* {NEWLINE} {
+<SHEBANG>"#!".*{NEWLINE} {
 	CG(zend_lineno)++;
 	BEGIN(INITIAL);
 	goto restart;

Original file line number	Diff line number	Diff line change
`@@ -2005,7 +2005,7 @@ string:`
`2005`	`2005`	`RETURN_TOKEN(T_NS_C);`
`2006`	`2006`	`}`
`2007`	`2007`
`2008`		`-<SHEBANG>"#!" .* {NEWLINE} {`
	`2008`	`+<SHEBANG>"#!".*{NEWLINE} {`
`2009`	`2009`	`CG(zend_lineno)++;`
`2010`	`2010`	`BEGIN(INITIAL);`
`2011`	`2011`	`goto restart;`