camkes/parser/stage0.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
# Copyright 2017, Data61
# Commonwealth Scientific and Industrial Research Organisation (CSIRO)
# ABN 41 687 119 230.
#
# This software may be distributed and modified according to the terms of
# the BSD 2-Clause license. Note that NO WARRANTY is provided.
# See "LICENSE_BSD2.txt" for details.
#
# @TAG(DATA61_BSD)
#

'''
Stage 0 parsers. These parsers are various alternatives for the first step in
the CAmkES parsing pipeline. A stage 0 parser makes the following
transformation:

    string/file ��� augmented_input
'''

from __future__ import absolute_import, division, print_function, \
    unicode_literals
from camkes.internal.seven import cmp, filter, map, zip

from .base import Parser
from .exception import ParseError
import codecs
import os
import re
import shutil
import subprocess


class CPP(Parser):
    '''
    An alternative to opening and reading a file that calls the C
    pre-processor.
    '''

    def __init__(self, cpp_bin='cpp', flags=None):
        self.cpp_bin = cpp_bin
        self.flags = flags or []
        self.out_dir = os.path.join(os.getcwd(), 'camkes-tool')
        if not os.path.isdir(self.out_dir):
            os.mkdir(self.out_dir)

    def parse_file(self, filename):
        # Run cpp with -MD to generate dependencies because we want to
        # track what files it read.
        output_basename = os.path.join(self.out_dir, os.path.basename(filename))
        output = output_basename + '.cpp'
        deps = output_basename + '.d'
        p = subprocess.Popen([self.cpp_bin, '-MD', '-MF', deps, '-o',
                              output] + self.flags + [filename], stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE, universal_newlines=True)
        _, stderr = p.communicate()
        if p.returncode != 0:
            raise ParseError('CPP failed: %s' % stderr)
        with codecs.open(output, 'r', 'utf-8') as f:
            processed = f.read()
        with codecs.open(deps, 'r', 'utf-8') as f:
            read = set(parse_makefile_rule(f))
        return processed, set([filename]) | read

    def parse_string(self, string):
        output_basename = os.path.join(self.out_dir,  'output.camkes')
        output = output_basename + '.cpp'
        deps = output_basename + '.d'
        p = subprocess.Popen([self.cpp_bin, '-MD', '-MF', deps, '-o',
                              output] + self.flags, stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                             universal_newlines=True)
        # hack around python2 and 3's awful unicode problems
        try:
            string = str(string)
        except UnicodeEncodeError:
            # str will fail on python2 as it is ascii only.
            # however the below fails on python3. So here we are.
            string = string.encode('utf-8')
        _, stderr = p.communicate(string)
        if p.returncode != 0:
            raise ParseError('CPP failed: %s' % stderr)
        with codecs.open(output, 'r', 'utf-8') as f:
            processed = f.read()
        with codecs.open(deps, 'r', 'utf-8') as f:
            read = set(parse_makefile_rule(f))
        return processed, read


class Reader(Parser):
    '''
    A basic "parser" that just opens and reads the contents of a file.
    '''

    def parse_file(self, filename):
        with codecs.open(filename, 'r', 'utf-8') as f:
            return f.read(), set([filename])

    def parse_string(self, string):
        return string, set()


def parse_makefile_rule(f):
    '''
    Parse a dependency rule generated by the C pre-processor and return the
    dependencies of the rule.
    '''
    in_deps = False
    for line in f:
        line = line.strip()
        if line == '':
            continue
        if not in_deps:
            head = re.match(r'.*?:(.*)$', line)
            if head is None:
                raise ParseError('unexpected dependency line %s found' % line)
            in_deps = True
            line = head.group(1)
        for dep in line.split():
            if dep != '\\':
                yield dep
        if not line.endswith('\\'):
            return