Source code for ybe.lib.qti_reader

__author__ = 'Robbert Harms'
__date__ = '2020-04-18'
__maintainer__ = 'Robbert Harms'
__email__ = 'robbert@xkls.nl'
__licence__ = 'GPL v3'

import io
import os
import zipfile
from pathlib import Path

from lxml import etree
from bs4 import BeautifulSoup
from datetime import datetime

from ybe.lib.ybe_contents import YbeExam, YbeInfo, MultipleChoice, MultipleResponse, OpenQuestion, TextHTML, Text, \
    MultipleChoiceAnswer, MultipleResponseAnswer, ZipArchiveContext, DirectoryContext, TextOnlyQuestion


[docs]def read_qti_zip(zip_file): """Parse the data from the provided QTI zip file and return an :class:`ybe.lib.ybe_contents.YbeExam` object. Since there are some differences in the data stored by the QTI format and the Ybe format, round-trip conversion may not be lossless. Args: zip_file (str): the filename of the zip file with QTI data to load Returns: ybe.lib.ybe_contents.YbeExam: an .ybe exam loaded with the content from the QTI zip file. """ path = None if isinstance(zip_file, (Path, str)): path = os.path.abspath(zip_file) archive = zipfile.ZipFile(zip_file, 'r') elif isinstance(zip_file, (bytes, bytearray)): archive = zipfile.ZipFile(io.BytesIO(zip_file), 'r') else: archive = zip_file if not len(archive.filelist): raise ValueError('The zip file is empty.') if 'imsmanifest.xml' not in archive.namelist(): raise ValueError('No imsmanifest.xml found in zip file.') def load_func(filename): return archive.read(filename) ybe_exam = _load_qti_manifest(load_func) ybe_exam.resource_context = ZipArchiveContext(path) return ybe_exam
[docs]def read_qti_dir(dir_name): """Parse the data from an extracted QTI zip file and return an :class:`ybe.lib.ybe_contents.YbeExam` object. Since there are some differences in the data stored by the QTI format and the Ybe format, round-trip conversion may not be lossless. Args: dir_name (str): the path to the directory with QTI data to load Returns: ybe.lib.ybe_contents.YbeExam: an .ybe exam loaded with the content from the QTI zip file. """ if not os.path.isdir(dir_name): raise ValueError(f'The provided path "{dir_name}" is not a directory.') def load_func(filename): with open(os.path.join(dir_name, filename), 'rb') as f: return f.read() ybe_exam = _load_qti_manifest(load_func) ybe_exam.resource_context = DirectoryContext(os.path.abspath(dir_name)) return ybe_exam
def _load_qti_manifest(file_load_func): """Load the QTI data from a file source. Args: file_load_func (Callable[str, bytes]): callable, which, given a filename returns the (binary) content of that file. Returns: ybe.lib.ybe_contents.YbeExam: loaded from the QTI data. """ ims_manifest = etree.fromstring(file_load_func('imsmanifest.xml')) ims_manifest_nsmap = ims_manifest.nsmap if None in ims_manifest_nsmap: del ims_manifest_nsmap[None] ims_datetime = ims_manifest.xpath('.//imsmd:dateTime', namespaces=ims_manifest_nsmap)[0].text resource_nodes = list(ims_manifest.xpath("//*[local-name() = 'resources']"))[0] resources = [] for resource_node in resource_nodes: resource_info = { 'type': resource_node.get('type'), 'identifier': resource_node.get('identifier'), 'href': resource_node.get('href', '') } for item in resource_node: if item.tag.endswith('file'): resource_info['file'] = item.get('href') if item.tag.endswith('dependency'): resource_info['dependency'] = item.get('identifierref') resources.append(resource_info) questions_resource = next(filter(lambda el: el['type'] == 'imsqti_xmlv1p2', resources)) meta_resource = next(filter(lambda el: el['href'].endswith('assessment_meta.xml'), resources)) meta_data = _load_assessment_meta(etree.fromstring(file_load_func(meta_resource['file']))) questions = _load_qti_questions(etree.fromstring(file_load_func(questions_resource['file']))) return YbeExam(questions=questions, info=YbeInfo(title=meta_data['title'], date=datetime.strptime(ims_datetime, '%Y-%m-%d').now().date())) def _load_assessment_meta(xml): """Parse the assessment meta file and return the title and description. Args: xml (etree): reference to the questions file Returns: dict: information parserd from the assessment_meta.xml file """ return {'title': xml[0].text, 'description': xml[1].text} def _load_qti_questions(xml): """Load questions from a QTI questions file. Args: xml (etree): the questions file loaded as an etree. Returns: List [ybe.lib.ybe_contents.Question]: the questions from the provided XML diagram """ question_nodes = list(xml[0][1]) question_types = { 'multiple_choice_question': _load_multiple_choice, 'multiple_answers_question': _load_multiple_response, 'essay_question': _load_open_question, 'text_only_question': _load_text_only_question } ybe_questions = [] for question_node in question_nodes: meta_data = _qtimetadata_to_dict(question_node[0][0]) if meta_data['question_type'] in question_types: ybe_questions.append(question_types[meta_data['question_type']](question_node)) return ybe_questions def _qtimetadata_to_dict(qtimetadata): """Load a ``qtimetadata`` node as a dictionary. Given an XML tree with as root ``qtimetadata``, this converts all the ``qtimetadatafield`` to key value pairs, with as key the content of ``fieldlabel`` and as value the content of ``fieldentry``. Args: qtimetadata (etree): an XML tree starting at the ``qtimetadata`` node Returns: dict: mapping ``fieldlabel`` to ``fieldentry``. """ result = {} for datafield in qtimetadata: label = None value = None for element in datafield: if element.tag.endswith('fieldlabel'): label = element.text if element.tag.endswith('fieldentry'): value = element.text result[label] = value return result def _load_multiple_choice(question_node): """Load a multiple choice question from the given XML tree. Args: question_node (etree): an question item node Returns: ybe.lib.ybe_contents.MultipleChoice: multiple choice question """ meta_data = _qtimetadata_to_dict(question_node[0][0]) text = _load_text(question_node[1][0]) correct_answer = None for resprocessing_node in question_node[2]: if resprocessing_node.tag.endswith('respcondition'): for item in resprocessing_node: if item.tag.endswith('setvar'): correct_answer = resprocessing_node[0][0].text answers = [] for response_label in question_node[1][1][0]: answers.append(MultipleChoiceAnswer(text=_load_text(response_label[0]), correct=(response_label.get('ident') == correct_answer))) return MultipleChoice(id=question_node.get('ident'), text=text, answers=answers, points=float(meta_data['points_possible'])) def _load_multiple_response(question_node): """Load a multiple response question from the given XML tree. Args: question_node (etree): an question item node Returns: ybe.lib.ybe_contents.MultipleChoice: multiple choice question """ meta_data = _qtimetadata_to_dict(question_node[0][0]) text = _load_text(question_node[1][0]) correct_answers = [] and_node = question_node[2][1][0][0] for condition_node in and_node: if condition_node.tag.endswith('varequal'): correct_answers.append(condition_node.text) answers = [] for response_label in question_node[1][1][0]: answers.append(MultipleResponseAnswer(text=_load_text(response_label[0]), correct=(response_label.get('ident') in correct_answers))) return MultipleResponse(id=question_node.get('ident'), text=text, answers=answers, points=float(meta_data['points_possible'])) def _load_open_question(question_node): """Load an open question from the given XML tree. Args: question_node (etree): an question item node Returns: ybe.lib.ybe_contents.OpenQuestion: loaded question """ meta_data = _qtimetadata_to_dict(question_node[0][0]) text = _load_text(question_node[1][0]) return OpenQuestion(id=question_node.get('ident'), text=text, points=float(meta_data['points_possible'])) def _load_text_only_question(question_node): """Load a text only question from the given XML tree. Args: question_node (etree): an question item node Returns: ybe.lib.ybe_contents.TextOnlyQuestion: loaded question """ meta_data = _qtimetadata_to_dict(question_node[0][0]) text = _load_text(question_node[1][0]) return TextOnlyQuestion(id=question_node.get('ident'), text=text, points=float(meta_data['points_possible'])) def _load_text(material_node): """Load the text from a node marked ``material``. Args: material_node (etree): an question item node Returns: ybe.lib.ybe_contents.TextNode: a text node subclass """ mattext = material_node[0] texttype = mattext.get('texttype') if texttype == 'text/html': parsed_html = BeautifulSoup(mattext.text, 'lxml') def only_local(src): return src.startswith('%24IMS-CC-FILEBASE%24/') or src.startswith('$IMS-CC-FILEBASE$/') for img in parsed_html.find_all('img', src=only_local): src = img.get('src') if src.startswith('%24IMS-CC-FILEBASE%24/'): src = src[len('%24IMS-CC-FILEBASE%24/'):] elif src.startswith('$IMS-CC-FILEBASE$/'): src = src[len('$IMS-CC-FILEBASE$/'):] img['src'] = src[:src.find('?')] def equations(class_): if not class_: return False return 'equation_image' in class_ for img in parsed_html.find_all('img', class_=equations): equation = img['data-equation-content'] eq_span = parsed_html.new_tag('span', attrs={'class': 'math inline'}) eq_span.string = f'\\({equation}\\)' img.replaceWith(eq_span) html_without_html_and_body_tags = "".join([str(x) for x in parsed_html.body.children]) text = TextHTML(html_without_html_and_body_tags) elif texttype == 'text/plain': text = Text(mattext.text) else: raise ValueError('No suitable text type found.') return text