Chemical Modification Integration Guide for Developersο
This guide is for developers who want to integrate chemical modification metadata into their siRNAforge workflows, scripts, or applications.
Quick Startο
1. Basic Workflowο
from sirnaforge.models.modifications import StrandMetadata, ChemicalModification
from sirnaforge.modifications import save_metadata_json
# Create metadata for your siRNA
metadata = StrandMetadata(
id="my_sirna_001",
sequence="AUCGAUCGAUCGAUCGAUCGA",
overhang="dTdT",
chem_mods=[
ChemicalModification(type="2OMe", positions=[1, 3, 5, 7, 9])
]
)
# Save to JSON file
save_metadata_json({"my_sirna_001": metadata}, "modifications.json")
2. Load Existing Patternsο
from pathlib import Path
import json
# Load example pattern
pattern_file = Path("examples/modification_patterns/standard_2ome.json")
with pattern_file.open() as f:
pattern = json.load(f)
# Extract modification positions
guide_positions = pattern["guide_modifications"]["2OMe"]["positions"]
3. Annotate FASTA Filesο
from sirnaforge.modifications import merge_metadata_into_fasta
# Merge metadata into FASTA headers
merge_metadata_into_fasta(
fasta_path="candidates.fasta",
metadata_path="modifications.json",
output_path="candidates_annotated.fasta"
)
Integration Patternsο
Pattern A: Post-Processing (Recommended for Most Cases)ο
Add modifications AFTER the main design workflow completes.
from sirnaforge.workflow import run_sirna_workflow
from sirnaforge.modifications import save_metadata_json
from sirnaforge.models.modifications import StrandMetadata, ChemicalModification
async def design_with_modifications(gene, output_dir):
# Step 1: Run standard design
results = await run_sirna_workflow(
gene_query=gene,
output_dir=output_dir,
top_n_candidates=20
)
# Step 2: Apply modifications to top candidates
metadata = {}
for candidate in results["design_summary"]["top_candidates"]:
guide_metadata = StrandMetadata(
id=f"{candidate.id}_guide",
sequence=candidate.guide_sequence,
overhang="dTdT",
chem_mods=apply_modification_pattern(
candidate.guide_sequence,
pattern="standard_2ome"
)
)
metadata[f"{candidate.id}_guide"] = guide_metadata
# Step 3: Save metadata
json_path = Path(output_dir) / "sirnaforge" / "modifications.json"
save_metadata_json(metadata, json_path)
return results, metadata
def apply_modification_pattern(sequence, pattern="standard_2ome"):
"""Apply a modification pattern to a sequence."""
if pattern == "standard_2ome":
# Alternating 2'-O-methyl
positions = [i for i in range(1, len(sequence)+1) if i % 2 == 1]
return [ChemicalModification(type="2OMe", positions=positions)]
elif pattern == "minimal":
# Terminal positions only
positions = [len(sequence)-2, len(sequence)-1, len(sequence)]
return [ChemicalModification(type="2OMe", positions=positions)]
else:
return []
Pros:
Clean separation of design and modification logic
No workflow slowdown
Easy to experiment with different patterns
Cons:
Requires separate step
Modifications not considered during scoring
Pattern B: Inline Integrationο
Add modifications during candidate creation.
from sirnaforge.models.sirna import SiRNACandidate
from sirnaforge.models.modifications import StrandMetadata, ChemicalModification
def create_candidate_with_modifications(
guide_seq, passenger_seq, position, transcript_id, score
):
"""Create candidate with embedded modification metadata."""
# Create modification metadata
guide_metadata = StrandMetadata(
id=f"candidate_{position}_guide",
sequence=guide_seq,
overhang="dTdT",
chem_mods=[
ChemicalModification(
type="2OMe",
positions=[i for i in range(1, len(guide_seq)+1) if i % 2 == 1]
)
]
)
# Create candidate with metadata
return SiRNACandidate(
id=f"candidate_{position}",
transcript_id=transcript_id,
position=position,
guide_sequence=guide_seq,
passenger_sequence=passenger_seq,
gc_content=calculate_gc_content(guide_seq),
length=len(guide_seq),
asymmetry_score=0.75,
composite_score=score,
guide_metadata=guide_metadata # Embedded metadata
)
Pros:
Metadata travels with candidate
Available in to_fasta() output
Single data structure
Cons:
More memory usage
Harder to experiment with patterns later
Pattern C: Hybrid (Best for Flexibility)ο
Store minimal info during design, enrich during export.
class ModificationAnnotator:
"""Helper class for adding modifications to candidates."""
def __init__(self, pattern_library_path):
self.patterns = self._load_patterns(pattern_library_path)
def _load_patterns(self, path):
"""Load modification patterns from JSON."""
patterns = {}
for pattern_file in Path(path).glob("*.json"):
with pattern_file.open() as f:
data = json.load(f)
if "pattern_name" in data:
patterns[data["pattern_name"]] = data
return patterns
def annotate_candidates(
self,
candidates: list[SiRNACandidate],
pattern_name: str = "standard_2ome"
) -> dict[str, StrandMetadata]:
"""Apply modification pattern to list of candidates."""
pattern = self.patterns.get(pattern_name)
if not pattern:
raise ValueError(f"Pattern {pattern_name} not found")
metadata = {}
for candidate in candidates:
# Create guide metadata
guide_metadata = self._create_metadata_from_pattern(
candidate.guide_sequence,
candidate.id,
pattern["guide_modifications"]
)
metadata[f"{candidate.id}_guide"] = guide_metadata
return metadata
def _create_metadata_from_pattern(self, sequence, sid, pattern_spec):
"""Create StrandMetadata from pattern specification."""
modifications = []
for mod_type, spec in pattern_spec.items():
positions = self._apply_position_strategy(
sequence,
spec.get("strategy", "custom"),
spec.get("positions", [])
)
modifications.append(
ChemicalModification(type=mod_type, positions=positions)
)
return StrandMetadata(
id=f"{sid}_guide",
sequence=sequence,
overhang="dTdT",
chem_mods=modifications
)
def _apply_position_strategy(self, sequence, strategy, positions):
"""Calculate positions based on strategy."""
if strategy == "alternating":
return [i for i in range(1, len(sequence)+1) if i % 2 == 1]
elif strategy == "terminal_3prime":
return [len(sequence)-2, len(sequence)-1, len(sequence)]
elif strategy == "custom":
return positions
else:
return []
# Usage
annotator = ModificationAnnotator("examples/modification_patterns")
metadata = annotator.annotate_candidates(
candidates=design_results.top_candidates,
pattern_name="standard_2ome"
)
Pros:
Flexible pattern management
Easy to add new patterns
Clean API
Cons:
More code to maintain
Requires pattern library
Best Practicesο
1. Validationο
Always validate modification positions against sequence length:
from pydantic import ValidationError
try:
metadata = StrandMetadata(
id="test",
sequence="AUCGAUCG", # 8 nucleotides
chem_mods=[
ChemicalModification(type="2OMe", positions=[1, 2, 10]) # Position 10 > 8!
]
)
except ValidationError as e:
print(f"Invalid modification: {e}")
2. Pattern Library Organizationο
Organize patterns by use case:
modification_patterns/
βββ cost_optimized/
β βββ minimal.json
β βββ basic_terminal.json
βββ standard/
β βββ alternating_2ome.json
β βββ balanced.json
βββ therapeutic/
β βββ maximal_stability.json
β βββ fda_approved_onpattro.json
βββ custom/
βββ my_pattern.json
3. Provenance Trackingο
Always include provenance for traceability:
from sirnaforge.models.modifications import Provenance, SourceType
metadata = StrandMetadata(
id="my_candidate",
sequence="AUCGAUCGAUCGAUCGAUCGA",
provenance=Provenance(
source_type=SourceType.DESIGNED,
identifier=f"sirnaforge_v{version}_{gene}_{timestamp}",
url="https://your-lab.org/experiments/exp-123"
)
)
4. Version Controlο
Store modification metadata in version control alongside code:
git add modifications.json
git commit -m "Add modification pattern for TP53 candidates"
git tag -a "v1.0-modifications" -m "First modification pattern set"
5. Testingο
Test your modification integration:
def test_modification_application():
"""Test that modifications are correctly applied."""
sequence = "AUCGAUCGAUCGAUCGAUCGA"
# Apply pattern
metadata = apply_pattern(sequence, "standard_2ome")
# Validate
assert len(metadata.chem_mods) > 0
assert all(pos <= len(sequence) for mod in metadata.chem_mods for pos in mod.positions)
assert metadata.sequence == sequence
Common Use Casesο
Use Case 1: Batch Annotationο
Annotate multiple FASTA files with the same pattern:
from pathlib import Path
def batch_annotate(input_dir, pattern_name="standard_2ome"):
"""Annotate all FASTA files in directory."""
for fasta_file in Path(input_dir).glob("*.fasta"):
# Create metadata for sequences
metadata = create_metadata_from_fasta(fasta_file, pattern_name)
# Save JSON
json_path = fasta_file.with_suffix(".json")
save_metadata_json(metadata, json_path)
# Create annotated FASTA
output_path = fasta_file.parent / f"{fasta_file.stem}_annotated.fasta"
merge_metadata_into_fasta(fasta_file, json_path, output_path)
Use Case 2: Synthesis Planningο
Generate synthesis order from annotated candidates:
def generate_synthesis_order(metadata_dict, vendor="IDT"):
"""Generate synthesis order format for vendor."""
order = []
for strand_id, metadata in metadata_dict.items():
entry = {
"name": strand_id,
"sequence": metadata.sequence,
"scale": "25nm",
"purification": "HPLC",
"modifications": []
}
# Format modifications for vendor
for mod in metadata.chem_mods:
for pos in mod.positions:
entry["modifications"].append({
"position": pos,
"type": mod.type,
"base": metadata.sequence[pos-1]
})
order.append(entry)
return order
Use Case 3: Cost Estimationο
Estimate synthesis costs based on modifications:
def estimate_cost(metadata: StrandMetadata) -> float:
"""Estimate synthesis cost based on modifications."""
base_cost = 300.0 # Base cost for unmodified 21-mer
# Cost per modification type
costs = {
"2OMe": 20.0,
"2F": 30.0,
"PS": 25.0,
"LNA": 50.0
}
total_cost = base_cost
for mod in metadata.chem_mods:
mod_cost = costs.get(mod.type, 30.0)
total_cost += len(mod.positions) * mod_cost
# HPLC purification
total_cost += 150.0
return total_cost
Troubleshootingο
Issue: Metadata not appearing in FASTA headersο
Solution: Ensure youβre using merge_metadata_into_fasta():
# Wrong - this won't add metadata
shutil.copy(input_fasta, output_fasta)
# Right - this merges metadata
merge_metadata_into_fasta(input_fasta, metadata_json, output_fasta)
Issue: Validation errors for positionsο
Solution: Check sequence length and 1-based indexing:
sequence = "AUCGAUCG" # 8 nucleotides
positions = [1, 2, 3, 8] # Valid (1-based, all <= 8)
positions = [0, 1, 2] # Invalid (0 is not allowed)
positions = [1, 2, 9] # Invalid (9 > 8)
Issue: Pattern not foundο
Solution: Verify pattern file exists and has correct structure:
from pathlib import Path
import json
pattern_file = Path("examples/modification_patterns/my_pattern.json")
if not pattern_file.exists():
print(f"Pattern file not found: {pattern_file}")
else:
with pattern_file.open() as f:
data = json.load(f)
print(f"Pattern name: {data.get('pattern_name')}")
Advanced Topicsο
Custom Modification Typesο
You can use any modification type string:
# Standard modifications
ChemicalModification(type="2OMe", positions=[1, 3, 5])
# Proprietary modifications
ChemicalModification(type="CustomMod_v2", positions=[7, 14, 21])
# Delivery conjugates
ChemicalModification(type="GalNAc", positions=[21]) # Conjugated at 3' end
Multi-Strand Complexesο
For siRNAs with guide and passenger:
duplex_metadata = {
"sirna_001_guide": StrandMetadata(
id="sirna_001_guide",
sequence=guide_seq,
chem_mods=[ChemicalModification(type="2OMe", positions=guide_positions)]
),
"sirna_001_passenger": StrandMetadata(
id="sirna_001_passenger",
sequence=passenger_seq,
chem_mods=[ChemicalModification(type="2OMe", positions=passenger_positions)]
)
}
Integration with Workflow Outputsο
Add modifications to workflow outputs:
# After workflow completes
output_dir = Path("results/TP53")
candidates_csv = output_dir / "sirnaforge" / "TP53_pass.csv"
candidates_fasta = output_dir / "sirnaforge" / "TP53_pass.fasta"
# Load candidates from CSV
df = pd.read_csv(candidates_csv)
# Create metadata for top candidates
metadata = {}
for _, row in df.head(10).iterrows():
metadata[row['id']] = create_metadata_for_candidate(row)
# Save and annotate
json_path = output_dir / "sirnaforge" / "modifications.json"
save_metadata_json(metadata, json_path)
annotated_fasta = output_dir / "sirnaforge" / "TP53_pass_annotated.fasta"
merge_metadata_into_fasta(candidates_fasta, json_path, annotated_fasta)
Referencesο
Specification:
docs/modification_annotation_spec.mdAPI Reference:
docs/api_reference.rstExamples:
examples/chemical_modifications_example.pyTests:
tests/unit/test_modifications.py
Last Updated: 2025-10-24 Maintainer: siRNAforge Development Team