From b57d3214f481bc870f716092448b4e3eb3727748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alja=C5=BE=20Gere=C4=8Dnik?= Date: Sat, 22 Feb 2025 16:11:00 +0100 Subject: [PATCH] SR conversion to encapsulated PDF document DICOM object --- SRToPDF.py | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 SRToPDF.py diff --git a/SRToPDF.py b/SRToPDF.py new file mode 100644 index 0000000..86a482a --- /dev/null +++ b/SRToPDF.py @@ -0,0 +1,148 @@ +import pydicom +from pydicom.dataset import Dataset +from pydicom.dataset import FileMetaDataset +from pydicom.uid import MediaStorageDirectoryStorage, EncapsulatedPDFStorage, generate_uid +import matplotlib.pyplot as plt +from reportlab.pdfgen import canvas + + +def load_mammography_image(dicom_path): + """Loads a mammography DICOM image as a NumPy array.""" + ds = pydicom.dcmread(dicom_path) + image = ds.pixel_array + return image, ds + + +def extract_measurements(sr_path): + """Extracts measurement annotations from an SR DICOM file.""" + ds = pydicom.dcmread(sr_path) + measurements = [] + probabilities = [] + + if "ContentSequence" in ds: + for itemLevel1 in ds.ContentSequence: + if len(itemLevel1.ConceptNameCodeSequence) == 1: + if itemLevel1.ConceptNameCodeSequence[0].CodeMeaning == "Imaging Measurements": + for itemLevel2 in itemLevel1.ContentSequence: + for itemLevel3 in itemLevel2.ContentSequence: + if itemLevel3.ValueType == "SCOORD": + measurements.append(itemLevel3.GraphicData) + elif itemLevel3.ValueType == "NUM": + if len(itemLevel3.MeasuredValueSequence) == 1: + probabilities.append(itemLevel3.MeasuredValueSequence[0].NumericValue) + return measurements, probabilities + + +def overlay_measurements(image, measurements, probabilities): + """Overlays extracted measurements onto the mammography image.""" + fig, ax = plt.subplots() + ax.imshow(image, cmap='gray') + + # Draw each polyline + for i in range(0, len(measurements), 1): + measurement = measurements[i] + x = measurement[0::2] # Extract x-coordinates (every other value) + y = measurement[1::2] # Extract y-coordinates (every other value) + ax.plot(x, y, 'lime', linewidth=1) # Plot the entire polyline at once + ax.text(x[-3] + 100, y[-3], f"{probabilities[i]:.2f} %", color='lime', fontsize=8) + + ax.axis("off") + + # Save the overlay as an image + plt.savefig("temp.png", bbox_inches='tight', pad_inches=0) + plt.close() + + +def create_pdf(image_path, measurements, pdf_path): + """Creates a PDF with the mammography image and extracted measurements.""" + c = canvas.Canvas(pdf_path) + + # Add the image to the PDF + c.drawImage(image_path, 50, 300) + + # Add extracted measurements + c.setFont("Helvetica", 12) + c.drawString(50, 280, "Extracted Measurements:") + + y = 260 + for text in measurements: + c.drawString(70, y, f"- {text}") + y -= 15 # Move down for each line + + c.save() + +def create_dcm_pdf(sr_path, pdf_path): + sr = pydicom.dcmread(sr_path) + ds = Dataset() + + # Add general DICOM metadata + ds.PatientName = sr.PatientName + ds.PatientID = sr.PatientID + ds.PatientBirthDate = sr.PatientBirthDate + ds.PatientSex = sr.PatientSex + + ds.StudyInstanceUID = sr.StudyInstanceUID + ds.StudyDate = sr.StudyDate + ds.StudyTime = sr.StudyTime + ds.AccessionNumber = sr.AccessionNumber + ds.ReferringPhysicianName = sr.ReferringPhysicianName + ds.StudyID = sr.StudyID + + ds.SeriesInstanceUID = generate_uid() + ds.SeriesDate = sr.SeriesDate + ds.SeriesTime = sr.SeriesTime + ds.SeriesNumber = 1 + ds.Modality = "DOC" + + ds.Manufacturer = "MammographyAI" + ds.ConversionType = "DI" + + ds.SOPInstanceUID = generate_uid() + ds.SOPClassUID = EncapsulatedPDFStorage + + # Open the PDF file and read it as binary data + with open(pdf_path, 'rb') as f: + pdf_data = f.read() + + # Add the EncapsulatedDocument (PDF content) to the DICOM dataset + ds.ContentDate = ds.SeriesDate + ds.ContentTime = ds.SeriesTime + ds.AcquisitionDateTime = "" + ds.InstanceNumber = 1 + ds.BurnedInAnnotation = "YES" + ds.DocumentTitle = "" + ds.EncapsulatedDocument = pdf_data + ds.MIMETypeOfEncapsulatedDocument = "application/pdf" + + # Create a FileMetaDataset for DICOM file meta information + file_meta = FileMetaDataset() + file_meta.MediaStorageSOPClassUID = EncapsulatedPDFStorage + file_meta.MediaStorageSOPInstanceUID = ds.SOPInstanceUID + file_meta.TransferSyntaxUID = pydicom.uid.ImplicitVRLittleEndian + file_meta.FileMetaInformationGroupLength = 0 + + # Assign the file meta information to the dataset + ds.file_meta = file_meta + + # Ensure preamble and "DICM" prefix is included + ds.is_implicit_VR = True # Set to explicit VR + ds.is_little_endian = True # Set to little endian + + # Save the DICOM file with the preamble and DICM prefix + output_file = "output.dcm" + ds.save_as(output_file, write_like_original=False) + + print(f"DICOM file created: {output_file}") + + test = pydicom.dcmread("output.dcm") + print("test") + +# Example usage +mammogram_path = "MG000001.dcm" +sr_path = "SR000001.dcm" + +image, dicom_ds = load_mammography_image(mammogram_path) +measurements, probabilities = extract_measurements(sr_path) +overlay_measurements(image, measurements, probabilities) +create_pdf("temp.png", measurements, "temp.pdf") +create_dcm_pdf(sr_path, "temp.pdf",) \ No newline at end of file