Source code for odk_to_spss_syntax.variable_metadata

'''
Created on Jun 13, 2014

.. moduleauthor:: Esmail Fadae <efadae@hotmail.com>
'''

from collections import namedtuple
import json
import re

[docs]class VariableMetadata(namedtuple('_VariableMetadata', 'name, label, value_mappings')): ''' A :py:class:`VariableMetadata` object contains the metadata about an individual form variable. The class inherits from a semi-anonymous :py:func:`namedtuple` (:py:class:`_VariableMetadata`) that handles object construction and the creation of immutable attribute accessors. :param str name: The encoded name of the variable (e.g. "a01") :param str label: The variable's readable label (e.g. "What is your sex?") :param dict value_mappings: A dictionary that maps encoded value names (e.g. "0", "1") to value labels (e.g. "Female", "Male") ''' def _to_spss_syntax(self): ''' Output the syntax file lines that correspond to this object. :returns: SPSS-syntax-file-formatted strings for use in a syntax file's "VARIABLE LABELS" and (possibly) "VALUE LABELS" sections. :rtype: tuple(str, str) ''' # TODO: Should labels be truncated to 116 characters? # Variable labels aren't always specified. variable_label_line= '/' + self.name + ' "' if self.label == None: variable_label_line+= self.name + '"' else: variable_label_line+= self.label + '"' # There aren't always value labels to report. if self.value_mappings == None: value_label_line= None else: value_label_line= '/' + self.name sorted_value_names= self.value_mappings.keys() sorted_value_names.sort() for value_name in sorted_value_names: value_label= self.value_mappings[value_name] value_label_line+= ' ' + value_name + ' "' + value_label + '"' return variable_label_line, value_label_line @classmethod
[docs] def export_spss_syntax(cls, variable_metadata_list): ''' Export the supplied :py:class:`VariableMetadata` objects to a string for use in an SPSS syntax file. :param variable_metadata_list: The metadata to export. :type variable_metadata_list: list(:py:class:`VariableMetadata`) :returns: An SPSS-syntax-file-formatted string. :rtype: :py:class:`String` ''' if len(variable_metadata_list) == 0: return '' variable_label_lines= list() value_label_lines= list() for var_metadata in variable_metadata_list: var_label_line, val_label_line= var_metadata._to_spss_syntax() if var_label_line != None: variable_label_lines.append(var_label_line) if val_label_line != None: value_label_lines.append(val_label_line) # Remove the prepending "/" from the first variable label line. variable_label_lines[0]= variable_label_lines[0].split('/')[1] syntax_string= 'VARIABLE LABELS\n' for var_label_line in variable_label_lines: syntax_string+= var_label_line + '\n' # There aren't always value labels to report. if len(value_label_lines) != 0: syntax_string+= '\nVALUE LABELS\n' for val_label_line in value_label_lines: syntax_string+= val_label_line + '\n' return syntax_string
@classmethod
[docs] def import_json(cls, odk_json_text): ''' Parse question metadata (e.g. names, labels, value mappings) from the supplied JSON-formatted ODK form text. :param str odk_json_text: The JSON-formatted text of the form being imported. :returns: :py:class:`VariableMetadata` objects that correspond to the JSON form's questions. :rtype: list(:py:class:`VariableMetadata`) ''' form_dict= json.loads(odk_json_text) return cls._import(form_dict)
@classmethod def _import(cls, odk_form_dict): ''' Where the actual importing work occurs. Takes an ODK form pre-parsed into :py:class:`dict` and generates the appropriate metadata. returns the appropriate :py:class:`VariableMetadata` objects. :param dict odk_form_dict: The ODK form parsed into a :py:class:`dict`. :returns: :py:class:`VariableMetadata` objects that correspond to the form's questions. :rtype: list(:py:class:`VariableMetadata`) ''' form_variables= odk_form_dict['children'] variable_metadata_list= list() for form_var in form_variables: if form_var['type'] == 'group': # Recursively import groups. group_variable_metadata_list= cls._import(form_var) variable_metadata_list.extend(group_variable_metadata_list) continue var_name= form_var['name'].encode('utf-8') if 'label' not in form_var.keys(): var_label= None else: var_label= form_var['label'].encode('utf-8') # TODO: Find out multi-select "type" (e.g. "select multiple") if form_var['type'] in ['select one']: value_mappings_list= form_var['children'] value_mappings= dict() for mapping in value_mappings_list: val_name= mapping['name'].encode('utf-8') val_label= mapping['label'].encode('utf-8') value_mappings[val_name]= val_label else: value_mappings= None variable_metadata= cls(var_name, var_label, value_mappings) variable_metadata_list.append(variable_metadata) # TODO: Not really knowing the "calculate" syntax, this is likely very brittle. if form_var['type'] == 'calculate': calculation_string= form_var['bind']['calculate'] # Find the first substring of the form "'matched substring:" calculated_var_name= re.match(r'''^.+'(.+):''', calculation_string).groups()[0].encode('utf-8') calculated_variable_metadata= cls(calculated_var_name, calculated_var_name, None) variable_metadata_list.append(calculated_variable_metadata) return variable_metadata_list