Tiamat-Tech
diff --git a/‎poc/README.md
Lines changed: 12 additions & 5 deletions b/‎poc/README.md
Lines changed: 12 additions & 5 deletions
diff --git a/‎poc/bin/build.js
Lines changed: 46 additions & 0 deletions b/‎poc/bin/build.js
Lines changed: 46 additions & 0 deletions
diff --git a/‎poc/grammar/lexer.js
Lines changed: 34 additions & 0 deletions b/‎poc/grammar/lexer.js
Lines changed: 34 additions & 0 deletions
@@ -2,10 +2,10 @@ The idea is to batch the update of the parser by syncing with php source.
 
 # Goals / Requisites :
 
-- Avoid updating by hand lexer or grammar
-- Avoid forking PHP files (just keep them in sync)
-- Having exactly the same behavior (precedence, expression parsing, error messages)
-- Have a decent parsing speed
+1. Avoid updating by hand lexer or grammar
+2. Avoid forking PHP files (just keep them in sync)
+3. Having exactly the same behavior (precedence, expression parsing, error messages)
+4. Have a decent parsing speed
 
 ## PHP Parser from nikic 
 
@@ -17,8 +17,15 @@ It will fail as it depends on php tokenizer launched from the php runtime, and i
 
 Second approch (in progress) is to :
 
-- [ ] Download grammar from PHP src
+- [x] Download grammar from PHP src
 - [ ] Automate C specific code
 - [ ] Inject javascript code
 - [ ] Generate lexer & grammar files
 - [ ] Generate files with JISON
+
+Lexer Problems :
+1. Stripping bison not supported tags - should be fine, but needs maintenance on future upgrades (breaks 1 pre-requisite)
+2. Stripping C code - can be done but demands reimplementation (break 3 pre-requisite)
+
+Grammar Problems :
+@todo
@@ -0,0 +1,46 @@
+#!/usr/bin/node
+
+
+const http = require('https');
+const fs = require('fs');
+const lexer = require('../grammar/lexer');
+
+// 1. Downloading lexer files from PHP
+const files = {
+    "php7": {
+        "lexer.l": "https://raw.githubusercontent.com/php/php-src/master/Zend/zend_language_scanner.l",
+        "grammar.y": "https://raw.githubusercontent.com/php/php-src/master/Zend/zend_language_parser.y"
+    },
+    "php5": {
+        "lexer.l": "https://raw.githubusercontent.com/php/php-src/PHP-5.6.40/Zend/zend_language_scanner.l",
+        "grammar.y": "https://raw.githubusercontent.com/php/php-src/PHP-5.6.40/Zend/zend_language_parser.y"
+    }
+};
+const target = __dirname + '/../zend';
+const dl = [];
+for(let ver in files) {
+    for(let type in files[ver]) {
+        dl.push(new Promise(function(version, filename, url, done, reject) {
+            return done(true); // disable the download
+            console.log('Downloading ' + filename + ' for ' + version);
+            const file = fs.createWriteStream(target + '/' + version + '/' + filename);
+            const request = http.get(url, function(response) {
+              response.pipe(file);
+              response.on('end', function() {
+                  done(true);
+              });
+              response.on('error', function(e) {
+                  reject(e);
+              });
+            });
+        }.bind(this, ver, type, files[ver][type])));
+    }
+}
+Promise.all(dl).then(function() {
+    console.log('Download is ready');
+    // upgrading the lexer
+    lexer(target + '/php7/lexer.l', null);
+}).catch(function(e) {
+    console.error(e);
+    process.exit(1);
+});
@@ -0,0 +1,34 @@
+/**
+ * This module open a lexer file and extract it's parts
+ */
+const lexer = require('jison-gho').lexParser;
+const fs = require('fs');
+module.exports = function(filename, destination) {
+    fs.readFile(filename, function(err, contents) { 
+        contents = contents.toString();
+        // remove header / footer
+        let header = contents.indexOf('/*!re2c');
+        let footer = contents.indexOf('\n*/\n', header);
+        contents = contents.substring(header + 7, footer);
+        // remove macro
+        contents = contents.replace('re2c:yyfill:check = 0;', '');
+        // remove trigger on states
+        contents = contents.replace(/\<\!\*\>[^;]+;\n*/g, '');
+        // handle script blocks
+        contents = contents.replace(/(\<[^\>]+[^\n]+)\s+{\n*/g, '$1 %{\n');
+        contents = contents.replace(/\n+\}\n/g, '\n%}\n');
+        // locate macros
+        let macro = contents.indexOf('\n<');
+        contents = contents.substring(0, macro - 1) + '\n%%\n' + contents.substring(macro + 1);
+        // migrate each macro
+        contents = contents.replace(/^(\<.*?\>)([^\n]+)\s+\%\{(.*?)\%\}/gms, function(text, state, tag, script) {
+            return state + tag + '\treturn null /* @todo ' + tag + '*/ ;';
+        });
+        try {
+            const ast = lexer.parse(contents);
+            console.log(ast);    
+        } catch(e) {
+            console.error(e.message);
+        }
+    });
+};