Skip to content

API Reference

Working with Projects (RefiProject)

Handler for REFI-QDA Project (.qdpx) files.

A .qdpx file is a zipped archive containing a primary XML project file and optional external sources (like PDFs or media).

Source code in pyrefiqda/refiproject.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
class RefiProject:
    """Handler for REFI-QDA Project (.qdpx) files.

    A `.qdpx` file is a zipped archive containing a primary XML project file 
    and optional external sources (like PDFs or media).
    """

    @staticmethod
    def load(file_path: str | Path, extract_dir: str = "./extracted_qdpx") -> Project:
        """Unzips a .qdpx file and parses the internal XML into Pydantic models.

        Args:
            file_path: The path to the existing `.qdpx` project file.
            extract_dir: The directory where the zipped archive will be extracted. 
                Defaults to "./extracted_qdpx".

        Returns:
            Project: The strictly-typed Pydantic Project model containing all qualitative data.

        Raises:
            FileNotFoundError: If the provided `.qdpx` file does not exist, or if 
                no `.qde` XML file is found inside the extracted archive.

        Example:
            ```python
            from pyrefiqda import RefiProject
            project = RefiProject.load("study.qdpx", extract_dir="./temp_extract")
            ```
        """
        file_path = Path(file_path)
        extract_path = Path(extract_dir)

        # 1. Unzip the .qdpx file
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)

        # 2. Find the .qde XML file inside
        qde_files = list(extract_path.glob("*.qde"))
        if not qde_files:
            raise FileNotFoundError("No .qde file found inside the .qdpx archive.")

        qde_file = qde_files[0]

        # 3. Parse the XML into the generated Pydantic models
        parser = XmlParser()
        project = parser.parse(str(qde_file), Project)

        return project

    @staticmethod
    def save(project: Project, file_path: str | Path, source_media_dir: str | Path | None = None) -> None:
        """Serializes a Project Pydantic model back to XML and packages it into a .qdpx zip archive.

        This method handles the standard compression required by the REFI-QDA
        specification, ensuring it can be opened by NVivo, MAXQDA, etc.

        Args:
            project: The populated Pydantic Project model.
            file_path: The destination path where the .qdpx file will be saved.
            source_media_dir: (Optional) The local directory containing the source files 
                (e.g., PDFs, images) that need to be packaged into the archive's `sources/` folder.

        Raises:
            IOError: If there is an issue writing to the destination path.

        Example:
            ```python
            from pyrefiqda import RefiProject

            RefiProject.save(my_project, "project.qdpx")
            ```
        """
        file_path = Path(file_path)

        # 1. Serialize the Pydantic model back to an XML string
        config = SerializerConfig(xml_declaration=True, encoding="UTF-8")
        serializer = XmlSerializer(config=config)
        xml_string = serializer.render(project)

        # 2. Package it into a .qdpx (zip) file
        with zipfile.ZipFile(file_path, 'w') as zip_ref:
            # Write the XML string directly into the zip archive as a .qde file
            zip_ref.writestr("project.qde", xml_string)

            # Package all source files if a directory was provided
            if source_media_dir:
                source_path = Path(source_media_dir)
                if source_path.exists() and source_path.is_dir():
                    for root, _, files in os.walk(source_path):
                        for file in files:
                            local_file = Path(root) / file
                            # Preserve the 'sources/' folder structure inside the zip
                            arcname = f"sources/{file}"
                            zip_ref.write(local_file, arcname=arcname)

    @staticmethod
    def resolve_source_path(internal_path: str, extract_dir: str | Path) -> Path:
        """Resolves a REFI-QDA internal path to an actual local file path.

        REFI-QDA projects store media and text files inside a `sources/` directory 
        and reference them using an `internal://` URI scheme. This helper translates 
        that URI into a usable Python `Path` object pointing to the extracted file.

        Args:
            internal_path: The REFI-QDA internal URI string (e.g., `"internal://<GUID>.txt"`).
            extract_dir: The local directory where the `.qdpx` archive was originally extracted.

        Returns:
            Path: The resolved local filesystem path pointing to the source file.

        Raises:
            ValueError: If the provided `internal_path` does not start with `"internal://"`.

        Example: 
            ```python
            from pyrefiqda import RefiProject
            project = RefiProject.load("research.qdpx", extract_dir="./temp_project")

            # Analyze transcripts
            for source in project.sources.text_source:
                # Use helper to find the actual file on the hard drive
                local_path = RefiProject.resolve_source_path(
                    source.plain_text_path, 
                    "./temp_project"
                )

                with open(local_path, "r", encoding="utf-8") as f:
                    transcript_text = f.read()

                print(transcript_text)
            ```
        """
        if not internal_path.startswith("internal://"):
            raise ValueError("Provided path is not a valid REFI-QDA internal path.")

        filename = internal_path.replace("internal://", "")
        return Path(extract_dir) / "sources" / filename

    @staticmethod
    def import_source_file(local_file_path: str | Path, extract_dir: str | Path, source_guid: str | None = None) -> str:
        """Copies a local file into the project's sources directory and returns its REFI-QDA internal URI.

        This method ensures full compliance with the REFI-QDA standard by copying the file 
        into a `sources/` subfolder and renaming it to a unique GUID while preserving 
        the original file extension.

        Args:
            local_file_path: The path to the raw media/text file you want to add to the project.
            extract_dir: The temporary working directory for your project.
            source_guid: (Optional) A specific GUID to use. If None, a new UUID4 is generated.

        Returns:
            str: The properly formatted REFI-QDA internal URI (e.g., `"internal://<GUID>.pdf"`).

        Raises:
            FileNotFoundError: If the provided `local_file_path` does not exist.

        Example:
            ```python
            from pyrefiqda import RefiProject

            internal_uri = RefiProject.import_source_file(
                "raw_data/interview1.docx", 
                "./working_dir"
            )
            print(internal_uri) # Output: internal://b5566006-eb5f-43f0...docx
            ```
        """
        local_file_path = Path(local_file_path)
        extract_dir = Path(extract_dir)

        if not local_file_path.exists():
            raise FileNotFoundError(f"Source file not found: {local_file_path}")

        # Ensure the sources directory exists
        sources_dir = extract_dir / "sources"
        sources_dir.mkdir(parents=True, exist_ok=True)

        # Generate GUID if not provided
        guid = source_guid or str(uuid.uuid4())

        # Standard mandates: GUID as filename, retaining original extension
        ext = local_file_path.suffix
        new_filename = f"{guid}{ext}"

        # Copy the file into the project's source directory
        dest_path = sources_dir / new_filename
        shutil.copy2(local_file_path, dest_path)

        # Return the internal URI string
        return f"internal://{new_filename}"

import_source_file(local_file_path, extract_dir, source_guid=None) staticmethod

Copies a local file into the project's sources directory and returns its REFI-QDA internal URI.

This method ensures full compliance with the REFI-QDA standard by copying the file into a sources/ subfolder and renaming it to a unique GUID while preserving the original file extension.

Parameters:

Name Type Description Default
local_file_path str | Path

The path to the raw media/text file you want to add to the project.

required
extract_dir str | Path

The temporary working directory for your project.

required
source_guid str | None

(Optional) A specific GUID to use. If None, a new UUID4 is generated.

None

Returns:

Name Type Description
str str

The properly formatted REFI-QDA internal URI (e.g., "internal://<GUID>.pdf").

Raises:

Type Description
FileNotFoundError

If the provided local_file_path does not exist.

Example
from pyrefiqda import RefiProject

internal_uri = RefiProject.import_source_file(
    "raw_data/interview1.docx", 
    "./working_dir"
)
print(internal_uri) # Output: internal://b5566006-eb5f-43f0...docx
Source code in pyrefiqda/refiproject.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
@staticmethod
def import_source_file(local_file_path: str | Path, extract_dir: str | Path, source_guid: str | None = None) -> str:
    """Copies a local file into the project's sources directory and returns its REFI-QDA internal URI.

    This method ensures full compliance with the REFI-QDA standard by copying the file 
    into a `sources/` subfolder and renaming it to a unique GUID while preserving 
    the original file extension.

    Args:
        local_file_path: The path to the raw media/text file you want to add to the project.
        extract_dir: The temporary working directory for your project.
        source_guid: (Optional) A specific GUID to use. If None, a new UUID4 is generated.

    Returns:
        str: The properly formatted REFI-QDA internal URI (e.g., `"internal://<GUID>.pdf"`).

    Raises:
        FileNotFoundError: If the provided `local_file_path` does not exist.

    Example:
        ```python
        from pyrefiqda import RefiProject

        internal_uri = RefiProject.import_source_file(
            "raw_data/interview1.docx", 
            "./working_dir"
        )
        print(internal_uri) # Output: internal://b5566006-eb5f-43f0...docx
        ```
    """
    local_file_path = Path(local_file_path)
    extract_dir = Path(extract_dir)

    if not local_file_path.exists():
        raise FileNotFoundError(f"Source file not found: {local_file_path}")

    # Ensure the sources directory exists
    sources_dir = extract_dir / "sources"
    sources_dir.mkdir(parents=True, exist_ok=True)

    # Generate GUID if not provided
    guid = source_guid or str(uuid.uuid4())

    # Standard mandates: GUID as filename, retaining original extension
    ext = local_file_path.suffix
    new_filename = f"{guid}{ext}"

    # Copy the file into the project's source directory
    dest_path = sources_dir / new_filename
    shutil.copy2(local_file_path, dest_path)

    # Return the internal URI string
    return f"internal://{new_filename}"

load(file_path, extract_dir='./extracted_qdpx') staticmethod

Unzips a .qdpx file and parses the internal XML into Pydantic models.

Parameters:

Name Type Description Default
file_path str | Path

The path to the existing .qdpx project file.

required
extract_dir str

The directory where the zipped archive will be extracted. Defaults to "./extracted_qdpx".

'./extracted_qdpx'

Returns:

Name Type Description
Project Project

The strictly-typed Pydantic Project model containing all qualitative data.

Raises:

Type Description
FileNotFoundError

If the provided .qdpx file does not exist, or if no .qde XML file is found inside the extracted archive.

Example
from pyrefiqda import RefiProject
project = RefiProject.load("study.qdpx", extract_dir="./temp_extract")
Source code in pyrefiqda/refiproject.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@staticmethod
def load(file_path: str | Path, extract_dir: str = "./extracted_qdpx") -> Project:
    """Unzips a .qdpx file and parses the internal XML into Pydantic models.

    Args:
        file_path: The path to the existing `.qdpx` project file.
        extract_dir: The directory where the zipped archive will be extracted. 
            Defaults to "./extracted_qdpx".

    Returns:
        Project: The strictly-typed Pydantic Project model containing all qualitative data.

    Raises:
        FileNotFoundError: If the provided `.qdpx` file does not exist, or if 
            no `.qde` XML file is found inside the extracted archive.

    Example:
        ```python
        from pyrefiqda import RefiProject
        project = RefiProject.load("study.qdpx", extract_dir="./temp_extract")
        ```
    """
    file_path = Path(file_path)
    extract_path = Path(extract_dir)

    # 1. Unzip the .qdpx file
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

    # 2. Find the .qde XML file inside
    qde_files = list(extract_path.glob("*.qde"))
    if not qde_files:
        raise FileNotFoundError("No .qde file found inside the .qdpx archive.")

    qde_file = qde_files[0]

    # 3. Parse the XML into the generated Pydantic models
    parser = XmlParser()
    project = parser.parse(str(qde_file), Project)

    return project

resolve_source_path(internal_path, extract_dir) staticmethod

Resolves a REFI-QDA internal path to an actual local file path.

REFI-QDA projects store media and text files inside a sources/ directory and reference them using an internal:// URI scheme. This helper translates that URI into a usable Python Path object pointing to the extracted file.

Parameters:

Name Type Description Default
internal_path str

The REFI-QDA internal URI string (e.g., "internal://<GUID>.txt").

required
extract_dir str | Path

The local directory where the .qdpx archive was originally extracted.

required

Returns:

Name Type Description
Path Path

The resolved local filesystem path pointing to the source file.

Raises:

Type Description
ValueError

If the provided internal_path does not start with "internal://".

Example
from pyrefiqda import RefiProject
project = RefiProject.load("research.qdpx", extract_dir="./temp_project")

# Analyze transcripts
for source in project.sources.text_source:
    # Use helper to find the actual file on the hard drive
    local_path = RefiProject.resolve_source_path(
        source.plain_text_path, 
        "./temp_project"
    )

    with open(local_path, "r", encoding="utf-8") as f:
        transcript_text = f.read()

    print(transcript_text)
Source code in pyrefiqda/refiproject.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
@staticmethod
def resolve_source_path(internal_path: str, extract_dir: str | Path) -> Path:
    """Resolves a REFI-QDA internal path to an actual local file path.

    REFI-QDA projects store media and text files inside a `sources/` directory 
    and reference them using an `internal://` URI scheme. This helper translates 
    that URI into a usable Python `Path` object pointing to the extracted file.

    Args:
        internal_path: The REFI-QDA internal URI string (e.g., `"internal://<GUID>.txt"`).
        extract_dir: The local directory where the `.qdpx` archive was originally extracted.

    Returns:
        Path: The resolved local filesystem path pointing to the source file.

    Raises:
        ValueError: If the provided `internal_path` does not start with `"internal://"`.

    Example: 
        ```python
        from pyrefiqda import RefiProject
        project = RefiProject.load("research.qdpx", extract_dir="./temp_project")

        # Analyze transcripts
        for source in project.sources.text_source:
            # Use helper to find the actual file on the hard drive
            local_path = RefiProject.resolve_source_path(
                source.plain_text_path, 
                "./temp_project"
            )

            with open(local_path, "r", encoding="utf-8") as f:
                transcript_text = f.read()

            print(transcript_text)
        ```
    """
    if not internal_path.startswith("internal://"):
        raise ValueError("Provided path is not a valid REFI-QDA internal path.")

    filename = internal_path.replace("internal://", "")
    return Path(extract_dir) / "sources" / filename

save(project, file_path, source_media_dir=None) staticmethod

Serializes a Project Pydantic model back to XML and packages it into a .qdpx zip archive.

This method handles the standard compression required by the REFI-QDA specification, ensuring it can be opened by NVivo, MAXQDA, etc.

Parameters:

Name Type Description Default
project Project

The populated Pydantic Project model.

required
file_path str | Path

The destination path where the .qdpx file will be saved.

required
source_media_dir str | Path | None

(Optional) The local directory containing the source files (e.g., PDFs, images) that need to be packaged into the archive's sources/ folder.

None

Raises:

Type Description
IOError

If there is an issue writing to the destination path.

Example
from pyrefiqda import RefiProject

RefiProject.save(my_project, "project.qdpx")
Source code in pyrefiqda/refiproject.py
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
@staticmethod
def save(project: Project, file_path: str | Path, source_media_dir: str | Path | None = None) -> None:
    """Serializes a Project Pydantic model back to XML and packages it into a .qdpx zip archive.

    This method handles the standard compression required by the REFI-QDA
    specification, ensuring it can be opened by NVivo, MAXQDA, etc.

    Args:
        project: The populated Pydantic Project model.
        file_path: The destination path where the .qdpx file will be saved.
        source_media_dir: (Optional) The local directory containing the source files 
            (e.g., PDFs, images) that need to be packaged into the archive's `sources/` folder.

    Raises:
        IOError: If there is an issue writing to the destination path.

    Example:
        ```python
        from pyrefiqda import RefiProject

        RefiProject.save(my_project, "project.qdpx")
        ```
    """
    file_path = Path(file_path)

    # 1. Serialize the Pydantic model back to an XML string
    config = SerializerConfig(xml_declaration=True, encoding="UTF-8")
    serializer = XmlSerializer(config=config)
    xml_string = serializer.render(project)

    # 2. Package it into a .qdpx (zip) file
    with zipfile.ZipFile(file_path, 'w') as zip_ref:
        # Write the XML string directly into the zip archive as a .qde file
        zip_ref.writestr("project.qde", xml_string)

        # Package all source files if a directory was provided
        if source_media_dir:
            source_path = Path(source_media_dir)
            if source_path.exists() and source_path.is_dir():
                for root, _, files in os.walk(source_path):
                    for file in files:
                        local_file = Path(root) / file
                        # Preserve the 'sources/' folder structure inside the zip
                        arcname = f"sources/{file}"
                        zip_ref.write(local_file, arcname=arcname)

Working with Codebooks (RefiCodebook)

Handler for standalone REFI-QDA Codebook (.qdc) files.

Unlike .qdpx projects, .qdc files are plain XML files used strictly for exchanging coding hierarchies without media sources.

Source code in pyrefiqda/reficodebook.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
class RefiCodebook:
    """Handler for standalone REFI-QDA Codebook (.qdc) files.

    Unlike `.qdpx` projects, `.qdc` files are plain XML files used strictly 
    for exchanging coding hierarchies without media sources.
    """

    @staticmethod
    def load(file_path: str | Path) -> CodeBook:
        """Parses a REFI-QDA Codebook (.qdc) XML file into a Pydantic model.

        Args:
            file_path: The path to the existing `.qdc` codebook file.

        Returns:
            CodeBook: The strictly-typed Pydantic CodeBook model.

        Raises:
            FileNotFoundError: If the provided `.qdc` file does not exist.

        Example:
            ```python
            from pyrefiqda import RefiCodebook
            codebook = RefiCodebook.load("initial_codes.qdc")
            ```
        """
        file_path = Path(file_path)
        if not file_path.exists():
            raise FileNotFoundError(f"Codebook file not found: {file_path}")

        parser = XmlParser()
        return parser.parse(str(file_path), CodeBook)

    @staticmethod
    def save(codebook: CodeBook, file_path: str | Path):
        """Serializes a CodeBook Pydantic model back to XML and saves it as a .qdc file.

        Args:
            codebook: The populated Pydantic CodeBook model.
            file_path: The destination path where the `.qdc` file will be saved.

        Example:
            ```python
            from pyrefiqda import RefiCodebook
            RefiCodebook.save(my_codebook, "updated_codes.qdc")
            ```
        """
        file_path = Path(file_path)

        serializer = XmlSerializer()
        xml_string = serializer.render(codebook)

        # Write the XML string directly to the file
        file_path.write_text(xml_string, encoding="utf-8")

load(file_path) staticmethod

Parses a REFI-QDA Codebook (.qdc) XML file into a Pydantic model.

Parameters:

Name Type Description Default
file_path str | Path

The path to the existing .qdc codebook file.

required

Returns:

Name Type Description
CodeBook CodeBook

The strictly-typed Pydantic CodeBook model.

Raises:

Type Description
FileNotFoundError

If the provided .qdc file does not exist.

Example
from pyrefiqda import RefiCodebook
codebook = RefiCodebook.load("initial_codes.qdc")
Source code in pyrefiqda/reficodebook.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
@staticmethod
def load(file_path: str | Path) -> CodeBook:
    """Parses a REFI-QDA Codebook (.qdc) XML file into a Pydantic model.

    Args:
        file_path: The path to the existing `.qdc` codebook file.

    Returns:
        CodeBook: The strictly-typed Pydantic CodeBook model.

    Raises:
        FileNotFoundError: If the provided `.qdc` file does not exist.

    Example:
        ```python
        from pyrefiqda import RefiCodebook
        codebook = RefiCodebook.load("initial_codes.qdc")
        ```
    """
    file_path = Path(file_path)
    if not file_path.exists():
        raise FileNotFoundError(f"Codebook file not found: {file_path}")

    parser = XmlParser()
    return parser.parse(str(file_path), CodeBook)

save(codebook, file_path) staticmethod

Serializes a CodeBook Pydantic model back to XML and saves it as a .qdc file.

Parameters:

Name Type Description Default
codebook CodeBook

The populated Pydantic CodeBook model.

required
file_path str | Path

The destination path where the .qdc file will be saved.

required
Example
from pyrefiqda import RefiCodebook
RefiCodebook.save(my_codebook, "updated_codes.qdc")
Source code in pyrefiqda/reficodebook.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
@staticmethod
def save(codebook: CodeBook, file_path: str | Path):
    """Serializes a CodeBook Pydantic model back to XML and saves it as a .qdc file.

    Args:
        codebook: The populated Pydantic CodeBook model.
        file_path: The destination path where the `.qdc` file will be saved.

    Example:
        ```python
        from pyrefiqda import RefiCodebook
        RefiCodebook.save(my_codebook, "updated_codes.qdc")
        ```
    """
    file_path = Path(file_path)

    serializer = XmlSerializer()
    xml_string = serializer.render(codebook)

    # Write the XML string directly to the file
    file_path.write_text(xml_string, encoding="utf-8")