Spaces:

rohitium
/

chest-xray-classification

Running

App Files Files Community

rohitium commited on 3 days ago

Commit

b412062

0 Parent(s):

Deploy Chest X-Ray App (LFS)

Browse files

Files changed (24) hide show

.gitattributes +4 -0
.gitignore +45 -0
.gradio/certificate.pem +31 -0
README.md +28 -0
data/Chest_Xray_PA_3-8-2010.png +3 -0
data/cxr14_subset_labels.csv +0 -0
data/large-pneumothorax-5.jpeg +3 -0
data/precomputed_image_embeddings.npz +3 -0
data/precomputed_text_embeddings.npz +3 -0
data/test_xray.png +3 -0
requirements.txt +13 -0
results/kaggle_predictions.csv +251 -0
results/kaggle_roc_curve.png +3 -0
results/roc_PNEUMOTHORAX.png +3 -0
src/app.py +110 -0
src/calculate_threshold.py +54 -0
src/create_dummy_image.py +13 -0
src/dicom_utils.py +49 -0
src/download.py +43 -0
src/evaluate.py +43 -0
src/evaluate_kaggle.py +142 -0
src/main.py +106 -0
src/model.py +171 -0
src/plot_kaggle_roc.py +62 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,4 @@

+*.npz filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,45 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# MacOS
+.DS_Store
+# Virtual Env
+venv/
+miniconda3/
+# Project Data
+# Exclude the large Kaggle dataset
+data/kaggle/
+# Exclude raw image download if any (keep precomputed embeddings)
+# data/*.png
+# data/*.jpg
+# data/*.jpeg
+# data/*.dcm
+# But FORCE include the necessary precomputed files and results for the app
+!data/precomputed_text_embeddings.npz
+!data/cxr14_subset_labels.csv
+!results/kaggle_roc_curve.png
+!results/roc_PNEUMOTHORAX.png
+!data/google-health/

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

README.md ADDED Viewed

	@@ -0,0 +1,28 @@

+---
+title: Chest X-Ray Zero-Shot Classifier
+emoji: 🩻
+colorFrom: blue
+colorTo: indigo
+sdk: gradio
+sdk_version: 5.47.2
+app_file: src/app.py
+pinned: false
+license: mit
+---
+# Chest X-Ray Zero-Shot Classifier
+This application uses the **Google CXR Foundation** model to perform zero-shot classification of Chest X-Rays for **Pneumothorax**.
+## Detection Logic
+- **Model**: `google/cxr-foundation` (ELIXR-C Image Encoder + QFormer)
+- **Method**: Zero-Shot Classification comparing image embeddings to text embeddings ("small pneumothorax" vs "no pneumothorax").
+- **Binary Threshold**: `-0.1173` (Calibrated on a local Kaggle Pneumothorax dataset using Youden's J statistic).
+## Performance
+- **Local Kaggle Dataset AUC**: 0.8804
+## How to use
+1. Upload a valid Chest X-Ray (PNG, JPG, or DICOM).
+2. Click "Analyze Image".
+3. View the prediction and confidence score.

data/Chest_Xray_PA_3-8-2010.png ADDED Viewed

Git LFS Details

SHA256: a43431b089a6dfab031c10eceef6c821e4f326913bf9015f73f4a1f0446d2be1
Pointer size: 132 Bytes
Size of remote file: 4.2 MB

data/cxr14_subset_labels.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/large-pneumothorax-5.jpeg ADDED Viewed

Git LFS Details

SHA256: 3d20720fa374684a4b003c2b2ebb5a9cb3de487120cafdc6bdc355924d8bbc49
Pointer size: 132 Bytes
Size of remote file: 1.88 MB

data/precomputed_image_embeddings.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c307ffb4f109160e1e64780e4c8522642d6581badb593d94717634c4a76574e
+size 45543702

data/precomputed_text_embeddings.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3de8680df438bf1adda19f8229179aecfc43c050771bb2582af4964fb76fa1d6
+size 931906

data/test_xray.png ADDED Viewed

Git LFS Details

SHA256: 9fd4ba373873ce679c0bdd72792ce2b3d0c59b8c4be1f9e66c76fe5de280cbac
Pointer size: 131 Bytes
Size of remote file: 525 kB

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+pandas
+numpy
+scikit-learn
+matplotlib
+wget
+huggingface_hub
+pypng
+Pillow
+tensorflow-text; sys_platform != 'win32'
+# tensorflow-metal; sys_platform == 'darwin' and platform_machine == 'arm64' # Disabled to avoid conflict with tensorflow-text
+tensorflow
+pydicom
+gradio

results/kaggle_predictions.csv ADDED Viewed

	@@ -0,0 +1,251 @@

+file,true_label,pneumothorax_score
+train/No Pneumothorax/000000.dcm,No Pneumothorax,-0.10425025224685669
+train/Pneumothorax/000001.dcm,Pneumothorax,-0.0685623288154602
+train/No Pneumothorax/000002.dcm,No Pneumothorax,-0.16316622495651245
+train/Pneumothorax/000003.dcm,Pneumothorax,-0.09676074981689453
+train/Pneumothorax/000004.dcm,Pneumothorax,-0.1509198546409607
+train/No Pneumothorax/000005.dcm,No Pneumothorax,-0.1722637414932251
+train/No Pneumothorax/000006.dcm,No Pneumothorax,-0.10494697093963623
+train/No Pneumothorax/000007.dcm,No Pneumothorax,-0.21636486053466797
+train/No Pneumothorax/000008.dcm,No Pneumothorax,-0.20555296540260315
+train/No Pneumothorax/000009.dcm,No Pneumothorax,-0.21164917945861816
+train/Pneumothorax/000010.dcm,Pneumothorax,-0.0807306170463562
+train/No Pneumothorax/000011.dcm,No Pneumothorax,-0.18297719955444336
+train/No Pneumothorax/000012.dcm,No Pneumothorax,-0.20279890298843384
+train/Pneumothorax/000013.dcm,Pneumothorax,-0.13381218910217285
+train/No Pneumothorax/000014.dcm,No Pneumothorax,-0.15488970279693604
+train/No Pneumothorax/000015.dcm,No Pneumothorax,-0.11352282762527466
+train/Pneumothorax/000016.dcm,Pneumothorax,-0.15156656503677368
+train/No Pneumothorax/000017.dcm,No Pneumothorax,-0.12871450185775757
+train/No Pneumothorax/000018.dcm,No Pneumothorax,-0.1893666386604309
+train/No Pneumothorax/000019.dcm,No Pneumothorax,-0.19539695978164673
+train/No Pneumothorax/000020.dcm,No Pneumothorax,-0.14108973741531372
+train/Pneumothorax/000021.dcm,Pneumothorax,-0.1235361099243164
+train/No Pneumothorax/000022.dcm,No Pneumothorax,-0.18652945756912231
+train/No Pneumothorax/000023.dcm,No Pneumothorax,-0.217128723859787
+train/No Pneumothorax/000024.dcm,No Pneumothorax,-0.17158114910125732
+train/No Pneumothorax/000025.dcm,No Pneumothorax,-0.15493667125701904
+train/No Pneumothorax/000026.dcm,No Pneumothorax,-0.19961446523666382
+train/No Pneumothorax/000027.dcm,No Pneumothorax,-0.046631813049316406
+train/No Pneumothorax/000028.dcm,No Pneumothorax,-0.13155603408813477
+train/Pneumothorax/000029.dcm,Pneumothorax,-0.08408623933792114
+train/No Pneumothorax/000030.dcm,No Pneumothorax,-0.1320541501045227
+train/No Pneumothorax/000031.dcm,No Pneumothorax,-0.20212656259536743
+train/No Pneumothorax/000032.dcm,No Pneumothorax,-0.1174507737159729
+train/No Pneumothorax/000033.dcm,No Pneumothorax,-0.22716906666755676
+train/No Pneumothorax/000034.dcm,No Pneumothorax,-0.19766950607299805
+train/No Pneumothorax/000035.dcm,No Pneumothorax,-0.14044338464736938
+train/No Pneumothorax/000036.dcm,No Pneumothorax,-0.20672184228897095
+train/No Pneumothorax/000037.dcm,No Pneumothorax,-0.12445247173309326
+train/Pneumothorax/000038.dcm,Pneumothorax,-0.0037876367568969727
+train/No Pneumothorax/000039.dcm,No Pneumothorax,-0.10316938161849976
+train/No Pneumothorax/000040.dcm,No Pneumothorax,-0.17113035917282104
+train/No Pneumothorax/000041.dcm,No Pneumothorax,-0.19905823469161987
+train/Pneumothorax/000042.dcm,Pneumothorax,-0.16009891033172607
+train/Pneumothorax/000043.dcm,Pneumothorax,-0.03364861011505127
+train/No Pneumothorax/000044.dcm,No Pneumothorax,-0.09765869379043579
+train/No Pneumothorax/000045.dcm,No Pneumothorax,-0.1575358510017395
+train/Pneumothorax/000046.dcm,Pneumothorax,-0.07842230796813965
+train/No Pneumothorax/000047.dcm,No Pneumothorax,-0.11831718683242798
+train/No Pneumothorax/000048.dcm,No Pneumothorax,-0.13761907815933228
+train/Pneumothorax/000049.dcm,Pneumothorax,-0.07772612571716309
+train/Pneumothorax/000050.dcm,Pneumothorax,-0.06707853078842163
+train/No Pneumothorax/000051.dcm,No Pneumothorax,-0.14870381355285645
+train/Pneumothorax/000052.dcm,Pneumothorax,-0.14959675073623657
+train/No Pneumothorax/000053.dcm,No Pneumothorax,-0.10760456323623657
+train/No Pneumothorax/000054.dcm,No Pneumothorax,-0.12791478633880615
+train/Pneumothorax/000055.dcm,Pneumothorax,-0.08234262466430664
+train/No Pneumothorax/000056.dcm,No Pneumothorax,-0.17744576930999756
+train/No Pneumothorax/000057.dcm,No Pneumothorax,-0.09609729051589966
+train/Pneumothorax/000058.dcm,Pneumothorax,-0.11940544843673706
+train/No Pneumothorax/000059.dcm,No Pneumothorax,-0.14210587739944458
+train/Pneumothorax/000060.dcm,Pneumothorax,-0.10487139225006104
+train/Pneumothorax/000061.dcm,Pneumothorax,-0.09002417325973511
+train/No Pneumothorax/000062.dcm,No Pneumothorax,-0.12446761131286621
+train/No Pneumothorax/000063.dcm,No Pneumothorax,-0.14532139897346497
+train/Pneumothorax/000064.dcm,Pneumothorax,-0.056851863861083984
+train/Pneumothorax/000065.dcm,Pneumothorax,-0.0926550030708313
+train/No Pneumothorax/000066.dcm,No Pneumothorax,-0.1890377700328827
+train/No Pneumothorax/000067.dcm,No Pneumothorax,-0.153730571269989
+train/Pneumothorax/000068.dcm,Pneumothorax,-0.10636574029922485
+train/No Pneumothorax/000069.dcm,No Pneumothorax,-0.15283668041229248
+train/Pneumothorax/000070.dcm,Pneumothorax,-0.06824761629104614
+train/No Pneumothorax/000071.dcm,No Pneumothorax,-0.13752543926239014
+train/No Pneumothorax/000072.dcm,No Pneumothorax,-0.17813771963119507
+train/No Pneumothorax/000073.dcm,No Pneumothorax,-0.09899967908859253
+train/No Pneumothorax/000074.dcm,No Pneumothorax,-0.17760634422302246
+train/No Pneumothorax/000075.dcm,No Pneumothorax,-0.1572226881980896
+train/Pneumothorax/000076.dcm,Pneumothorax,-0.14221316576004028
+train/No Pneumothorax/000077.dcm,No Pneumothorax,-0.2066076397895813
+train/No Pneumothorax/000078.dcm,No Pneumothorax,-0.12766695022583008
+train/No Pneumothorax/000079.dcm,No Pneumothorax,-0.19809648394584656
+train/No Pneumothorax/000080.dcm,No Pneumothorax,-0.10679352283477783
+train/Pneumothorax/000081.dcm,Pneumothorax,-0.09845387935638428
+train/No Pneumothorax/000082.dcm,No Pneumothorax,-0.17694079875946045
+train/Pneumothorax/000083.dcm,Pneumothorax,-0.13586944341659546
+train/Pneumothorax/000084.dcm,Pneumothorax,-0.058757483959198
+train/No Pneumothorax/000085.dcm,No Pneumothorax,-0.13550400733947754
+train/Pneumothorax/000086.dcm,Pneumothorax,-0.0947638750076294
+train/No Pneumothorax/000087.dcm,No Pneumothorax,-0.09306931495666504
+train/No Pneumothorax/000088.dcm,No Pneumothorax,-0.15457135438919067
+train/No Pneumothorax/000089.dcm,No Pneumothorax,-0.1434255838394165
+train/No Pneumothorax/000090.dcm,No Pneumothorax,-0.15700900554656982
+train/No Pneumothorax/000091.dcm,No Pneumothorax,-0.1415807604789734
+train/No Pneumothorax/000092.dcm,No Pneumothorax,-0.1890210211277008
+train/No Pneumothorax/000093.dcm,No Pneumothorax,-0.12824022769927979
+train/No Pneumothorax/000094.dcm,No Pneumothorax,-0.20132136344909668
+train/No Pneumothorax/000095.dcm,No Pneumothorax,-0.21727478504180908
+train/No Pneumothorax/000096.dcm,No Pneumothorax,-0.1915437877178192
+train/No Pneumothorax/000097.dcm,No Pneumothorax,-0.1858217716217041
+train/No Pneumothorax/000098.dcm,No Pneumothorax,-0.1721271276473999
+train/No Pneumothorax/000099.dcm,No Pneumothorax,-0.16763722896575928
+train/No Pneumothorax/000100.dcm,No Pneumothorax,-0.17117935419082642
+train/Pneumothorax/000101.dcm,Pneumothorax,-0.11121219396591187
+train/No Pneumothorax/000102.dcm,No Pneumothorax,-0.1796356439590454
+train/No Pneumothorax/000103.dcm,No Pneumothorax,-0.21206533908843994
+train/No Pneumothorax/000104.dcm,No Pneumothorax,-0.16594678163528442
+train/No Pneumothorax/000105.dcm,No Pneumothorax,-0.13096767663955688
+train/Pneumothorax/000106.dcm,Pneumothorax,-0.10808700323104858
+train/Pneumothorax/000107.dcm,Pneumothorax,-0.054500699043273926
+train/No Pneumothorax/000108.dcm,No Pneumothorax,-0.17718034982681274
+train/No Pneumothorax/000109.dcm,No Pneumothorax,-0.18512558937072754
+train/No Pneumothorax/000110.dcm,No Pneumothorax,-0.14153480529785156
+train/Pneumothorax/000111.dcm,Pneumothorax,-0.07206696271896362
+train/No Pneumothorax/000112.dcm,No Pneumothorax,-0.15524542331695557
+train/No Pneumothorax/000113.dcm,No Pneumothorax,-0.049727559089660645
+train/No Pneumothorax/000114.dcm,No Pneumothorax,-0.13511207699775696
+train/No Pneumothorax/000115.dcm,No Pneumothorax,-0.12422651052474976
+train/No Pneumothorax/000116.dcm,No Pneumothorax,-0.18628454208374023
+train/Pneumothorax/000117.dcm,Pneumothorax,-0.10177350044250488
+train/No Pneumothorax/000118.dcm,No Pneumothorax,-0.21400070190429688
+train/No Pneumothorax/000119.dcm,No Pneumothorax,-0.1408158540725708
+train/Pneumothorax/000120.dcm,Pneumothorax,0.0021948814392089844
+train/No Pneumothorax/000121.dcm,No Pneumothorax,-0.19585365056991577
+train/No Pneumothorax/000122.dcm,No Pneumothorax,-0.21072477102279663
+train/No Pneumothorax/000123.dcm,No Pneumothorax,-0.1782313585281372
+train/No Pneumothorax/000124.dcm,No Pneumothorax,-0.11618930101394653
+train/Pneumothorax/000125.dcm,Pneumothorax,-0.09667110443115234
+train/Pneumothorax/000126.dcm,Pneumothorax,-0.1602795124053955
+train/No Pneumothorax/000127.dcm,No Pneumothorax,-0.19403642416000366
+train/No Pneumothorax/000128.dcm,No Pneumothorax,-0.09534329175949097
+train/Pneumothorax/000129.dcm,Pneumothorax,-0.060146450996398926
+train/Pneumothorax/000130.dcm,Pneumothorax,-0.07106363773345947
+train/No Pneumothorax/000131.dcm,No Pneumothorax,-0.181549072265625
+train/No Pneumothorax/000132.dcm,No Pneumothorax,-0.16202104091644287
+train/No Pneumothorax/000133.dcm,No Pneumothorax,-0.1954769492149353
+train/Pneumothorax/000134.dcm,Pneumothorax,-0.11729413270950317
+train/No Pneumothorax/000135.dcm,No Pneumothorax,-0.18572622537612915
+train/No Pneumothorax/000136.dcm,No Pneumothorax,-0.17218655347824097
+train/No Pneumothorax/000137.dcm,No Pneumothorax,-0.19926214218139648
+train/No Pneumothorax/000138.dcm,No Pneumothorax,-0.19930341839790344
+train/Pneumothorax/000139.dcm,Pneumothorax,-0.1775619387626648
+train/Pneumothorax/000140.dcm,Pneumothorax,-0.09699171781539917
+train/No Pneumothorax/000141.dcm,No Pneumothorax,-0.1945207715034485
+train/Pneumothorax/000142.dcm,Pneumothorax,-0.05407905578613281
+train/No Pneumothorax/000143.dcm,No Pneumothorax,-0.14209693670272827
+train/Pneumothorax/000144.dcm,Pneumothorax,-0.06982123851776123
+train/Pneumothorax/000145.dcm,Pneumothorax,-0.13382339477539062
+train/No Pneumothorax/000146.dcm,No Pneumothorax,-0.19120937585830688
+train/No Pneumothorax/000147.dcm,No Pneumothorax,-0.15216165781021118
+train/No Pneumothorax/000148.dcm,No Pneumothorax,-0.20141980051994324
+train/No Pneumothorax/000149.dcm,No Pneumothorax,-0.20271122455596924
+train/No Pneumothorax/000150.dcm,No Pneumothorax,-0.16529840230941772
+train/Pneumothorax/000151.dcm,Pneumothorax,-0.15329903364181519
+train/Pneumothorax/000152.dcm,Pneumothorax,-0.08588516712188721
+train/Pneumothorax/000153.dcm,Pneumothorax,-0.15394580364227295
+train/Pneumothorax/000154.dcm,Pneumothorax,-0.03996264934539795
+train/Pneumothorax/000155.dcm,Pneumothorax,-0.13664811849594116
+train/No Pneumothorax/000156.dcm,No Pneumothorax,-0.16998988389968872
+train/Pneumothorax/000157.dcm,Pneumothorax,-0.09838944673538208
+train/Pneumothorax/000158.dcm,Pneumothorax,-0.1137932538986206
+train/No Pneumothorax/000159.dcm,No Pneumothorax,-0.16903436183929443
+train/No Pneumothorax/000160.dcm,No Pneumothorax,-0.19102925062179565
+train/Pneumothorax/000161.dcm,Pneumothorax,-0.13560515642166138
+train/No Pneumothorax/000162.dcm,No Pneumothorax,-0.21745437383651733
+train/No Pneumothorax/000163.dcm,No Pneumothorax,-0.16950178146362305
+train/No Pneumothorax/000164.dcm,No Pneumothorax,-0.06001162528991699
+train/No Pneumothorax/000165.dcm,No Pneumothorax,-0.15253078937530518
+train/No Pneumothorax/000166.dcm,No Pneumothorax,-0.2072521150112152
+train/No Pneumothorax/000167.dcm,No Pneumothorax,-0.1807345747947693
+train/No Pneumothorax/000168.dcm,No Pneumothorax,-0.1796715259552002
+train/No Pneumothorax/000169.dcm,No Pneumothorax,-0.12789452075958252
+train/Pneumothorax/000170.dcm,Pneumothorax,-0.10478848218917847
+train/No Pneumothorax/000171.dcm,No Pneumothorax,-0.15731322765350342
+train/No Pneumothorax/000172.dcm,No Pneumothorax,-0.12667322158813477
+train/Pneumothorax/000173.dcm,Pneumothorax,-0.16081935167312622
+train/No Pneumothorax/000174.dcm,No Pneumothorax,-0.14709264039993286
+train/No Pneumothorax/000175.dcm,No Pneumothorax,-0.1776459813117981
+train/No Pneumothorax/000176.dcm,No Pneumothorax,-0.16818833351135254
+train/No Pneumothorax/000177.dcm,No Pneumothorax,-0.16316360235214233
+train/No Pneumothorax/000178.dcm,No Pneumothorax,-0.1608293354511261
+train/No Pneumothorax/000179.dcm,No Pneumothorax,-0.1174200177192688
+train/No Pneumothorax/000180.dcm,No Pneumothorax,-0.15724217891693115
+train/Pneumothorax/000181.dcm,Pneumothorax,-0.06315004825592041
+train/No Pneumothorax/000182.dcm,No Pneumothorax,-0.18269914388656616
+train/No Pneumothorax/000183.dcm,No Pneumothorax,-0.1433737874031067
+train/No Pneumothorax/000184.dcm,No Pneumothorax,-0.19049185514450073
+train/Pneumothorax/000185.dcm,Pneumothorax,-0.06804823875427246
+train/No Pneumothorax/000186.dcm,No Pneumothorax,-0.20442822575569153
+train/Pneumothorax/000187.dcm,Pneumothorax,-0.1479809284210205
+train/No Pneumothorax/000188.dcm,No Pneumothorax,-0.1297626495361328
+train/Pneumothorax/000189.dcm,Pneumothorax,-0.14443817734718323
+train/No Pneumothorax/000190.dcm,No Pneumothorax,-0.20211485028266907
+train/Pneumothorax/000191.dcm,Pneumothorax,-0.10677635669708252
+train/No Pneumothorax/000192.dcm,No Pneumothorax,-0.15862226486206055
+train/No Pneumothorax/000193.dcm,No Pneumothorax,-0.14113175868988037
+train/No Pneumothorax/000194.dcm,No Pneumothorax,-0.22007161378860474
+train/No Pneumothorax/000195.dcm,No Pneumothorax,-0.10471892356872559
+train/No Pneumothorax/000196.dcm,No Pneumothorax,-0.20787471532821655
+train/No Pneumothorax/000197.dcm,No Pneumothorax,-0.16002091765403748
+train/No Pneumothorax/000198.dcm,No Pneumothorax,-0.17423555254936218
+train/Pneumothorax/000199.dcm,Pneumothorax,-0.0016582608222961426
+train/Pneumothorax/000200.dcm,Pneumothorax,-0.10900384187698364
+train/Pneumothorax/000201.dcm,Pneumothorax,0.029024243354797363
+train/No Pneumothorax/000202.dcm,No Pneumothorax,-0.10805076360702515
+train/No Pneumothorax/000203.dcm,No Pneumothorax,-0.1146092414855957
+train/No Pneumothorax/000204.dcm,No Pneumothorax,-0.19227838516235352
+train/No Pneumothorax/000205.dcm,No Pneumothorax,-0.19742238521575928
+train/No Pneumothorax/000206.dcm,No Pneumothorax,-0.23522859811782837
+train/No Pneumothorax/000207.dcm,No Pneumothorax,-0.17371898889541626
+train/No Pneumothorax/000208.dcm,No Pneumothorax,-0.15263259410858154
+train/No Pneumothorax/000209.dcm,No Pneumothorax,-0.15728116035461426
+train/No Pneumothorax/000210.dcm,No Pneumothorax,-0.13311928510665894
+train/No Pneumothorax/000211.dcm,No Pneumothorax,-0.1066751480102539
+train/No Pneumothorax/000212.dcm,No Pneumothorax,-0.1832524538040161
+train/Pneumothorax/000213.dcm,Pneumothorax,-0.09565180540084839
+train/Pneumothorax/000214.dcm,Pneumothorax,-0.06287646293640137
+train/No Pneumothorax/000215.dcm,No Pneumothorax,-0.07406270503997803
+train/No Pneumothorax/000216.dcm,No Pneumothorax,-0.08096122741699219
+train/Pneumothorax/000217.dcm,Pneumothorax,-0.05490368604660034
+train/No Pneumothorax/000218.dcm,No Pneumothorax,-0.05416452884674072
+train/Pneumothorax/000219.dcm,Pneumothorax,-0.01159369945526123
+train/No Pneumothorax/000220.dcm,No Pneumothorax,-0.1184004545211792
+train/No Pneumothorax/000221.dcm,No Pneumothorax,-0.2137947976589203
+train/Pneumothorax/000222.dcm,Pneumothorax,-0.10213685035705566
+train/Pneumothorax/000223.dcm,Pneumothorax,-0.12993067502975464
+train/No Pneumothorax/000224.dcm,No Pneumothorax,-0.1637454628944397
+train/No Pneumothorax/000225.dcm,No Pneumothorax,-0.1220596432685852
+train/No Pneumothorax/000226.dcm,No Pneumothorax,-0.1765921711921692
+train/Pneumothorax/000227.dcm,Pneumothorax,-0.05948609113693237
+train/No Pneumothorax/000228.dcm,No Pneumothorax,-0.16500937938690186
+train/No Pneumothorax/000229.dcm,No Pneumothorax,-0.2087046504020691
+train/No Pneumothorax/000230.dcm,No Pneumothorax,-0.10890644788742065
+train/No Pneumothorax/000231.dcm,No Pneumothorax,-0.21980196237564087
+train/Pneumothorax/000232.dcm,Pneumothorax,-0.042661070823669434
+train/Pneumothorax/000233.dcm,Pneumothorax,-0.07404029369354248
+train/No Pneumothorax/000234.dcm,No Pneumothorax,-0.19613447785377502
+train/No Pneumothorax/000235.dcm,No Pneumothorax,-0.16667985916137695
+train/Pneumothorax/000236.dcm,Pneumothorax,-0.07997268438339233
+train/No Pneumothorax/000237.dcm,No Pneumothorax,-0.17295250296592712
+train/Pneumothorax/000238.dcm,Pneumothorax,-0.02200949192047119
+train/No Pneumothorax/000239.dcm,No Pneumothorax,-0.1404871940612793
+train/Pneumothorax/000240.dcm,Pneumothorax,-0.045901477336883545
+train/No Pneumothorax/000241.dcm,No Pneumothorax,-0.13813674449920654
+train/No Pneumothorax/000242.dcm,No Pneumothorax,-0.1337980031967163
+train/No Pneumothorax/000243.dcm,No Pneumothorax,-0.09456092119216919
+train/No Pneumothorax/000244.dcm,No Pneumothorax,-0.12863361835479736
+train/No Pneumothorax/000245.dcm,No Pneumothorax,-0.23645243048667908
+train/No Pneumothorax/000246.dcm,No Pneumothorax,-0.17193585634231567
+train/Pneumothorax/000247.dcm,Pneumothorax,-0.0540735125541687
+train/No Pneumothorax/000248.dcm,No Pneumothorax,-0.18047136068344116
+train/No Pneumothorax/000249.dcm,No Pneumothorax,-0.1623515486717224

results/kaggle_roc_curve.png ADDED Viewed

Git LFS Details

SHA256: 1a23c4c61b39963d63003a4656071e57a8e05e2a064306baca181546bf72ea34
Pointer size: 130 Bytes
Size of remote file: 36.5 kB

results/roc_PNEUMOTHORAX.png ADDED Viewed

Git LFS Details

SHA256: 13c08570874493bd62d0dbad63954d974b938573f3cebf28f6d2ab8ef7d1b6fd
Pointer size: 130 Bytes
Size of remote file: 29.3 kB

src/app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import gradio as gr
+import os
+import sys
+import logging
+import numpy as np
+from PIL import Image
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Suppress TensorFlow logging
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+try:
+    import absl.logging
+    absl.logging.set_verbosity(absl.logging.ERROR)
+except ImportError:
+    pass
+logging.getLogger('tensorflow').setLevel(logging.ERROR)
+from model import RawImageModel, PrecomputedModel
+# Global Model Instances
+raw_model = None
+precomputed_model = None
+pos_emb = None
+neg_emb = None
+# Optimal Threshold from Kaggle validation
+THRESHOLD = -0.1173
+def load_models():
+    global raw_model, precomputed_model, pos_emb, neg_emb
+    if raw_model is None:
+        logger.info("Loading models...")
+        try:
+            precomputed_model = PrecomputedModel()
+            raw_model = RawImageModel()
+            # Pre-fetch text embeddings
+            pos_txt = 'small pneumothorax'
+            neg_txt = 'no pneumothorax'
+            pos_emb, neg_emb = precomputed_model.get_diagnosis_embeddings(pos_txt, neg_txt)
+            logger.info("Models loaded.")
+        except Exception as e:
+            logger.error(f"Failed to load models: {e}")
+            raise e
+def predict(image):
+    if image is None:
+        return "No image uploaded.", 0.0, "Please upload an image."
+    try:
+        # Save temp image for model ingestion
+        temp_path = "temp_gradio_upload.png"
+        image.save(temp_path)
+        # Run Inference
+        img_emb = raw_model.compute_embeddings(temp_path)
+        score = PrecomputedModel.zero_shot(img_emb, pos_emb, neg_emb)
+        score = float(score)
+        # Binary Classification
+        prediction = "PNEUMOTHORAX" if score >= THRESHOLD else "NORMAL / NO PNEUMOTHORAX"
+        # Confidence logic (simple distance from threshold)
+        # Using sigmoid to map score to probability-like 0-1 for display
+        # Note: This is an approximation
+        return prediction, score, f"Raw Score: {score:.4f} (Threshold: {THRESHOLD})"
+    except Exception as e:
+        logger.error(f"Prediction failed: {e}")
+        return "Error", 0.0, str(e)
+# Load models at startup
+load_models()
+# UI Layout
+with gr.Blocks(title="Chest X-Ray Zero-Shot Classifier") as demo:
+    gr.Markdown("# 🩻 Zero-Shot Chest X-Ray Classification")
+    gr.Markdown("Detect **Pneumothorax** from raw X-ray images using the `google/cxr-foundation` model.")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 1. Upload X-Ray")
+            input_image = gr.Image(type="pil", label="Upload Image (PNG/JPG/DICOM converted)")
+            predict_btn = gr.Button("Analyze Image", variant="primary")
+        with gr.Column():
+            gr.Markdown("### 2. Results")
+            output_label = gr.Label(label="Prediction")
+            output_score = gr.Number(label="Zero-Shot Score")
+            output_msg = gr.Textbox(label="Details")
+            gr.Markdown("---")
+            gr.Markdown("### Performance Context")
+            gr.Markdown("This model uses a **zero-shot** approach. The threshold was calibrated using a local Kaggle dataset.")
+            with gr.Tabs():
+                with gr.TabItem("Local Kaggle Benchmark"):
+                    gr.Image("results/kaggle_roc_curve.png", label="local ROC Curve")
+                    gr.Markdown("**AUC: 0.88** on 250 local samples.")
+                with gr.TabItem("Google Benchmark"):
+                    gr.Image("results/roc_PNEUMOTHORAX.png", label="Reference ROC")
+    predict_btn.click(predict, inputs=input_image, outputs=[output_label, output_score, output_msg])
+if __name__ == "__main__":
+    demo.launch(share=True)

src/calculate_threshold.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import pandas as pd
+import numpy as np
+from sklearn.metrics import roc_curve
+import logging
+import os
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+def calculate_optimal_threshold(results_path):
+    """
+    Calculates optimal threshold using Youden's J statistic.
+    """
+    if not os.path.exists(results_path):
+        logger.error(f"Results file not found: {results_path}")
+        return None
+    try:
+        df = pd.read_csv(results_path)
+        logger.info(f"Loaded {len(df)} predictions from {results_path}")
+        df = df.dropna(subset=['pneumothorax_score'])
+        if len(df) == 0:
+            logger.error("No valid predictions found.")
+            return None
+        # Binary Labels
+        y_true = (df['true_label'] == 'Pneumothorax').astype(int)
+        y_scores = df['pneumothorax_score']
+        fpr, tpr, thresholds = roc_curve(y_true, y_scores)
+        # Youden's J = Sensitivity + Specificity - 1
+        # Sensitivity = TPR
+        # Specificity = 1 - FPR
+        # J = TPR + (1 - FPR) - 1 = TPR - FPR
+        j_scores = tpr - fpr
+        best_idx = np.argmax(j_scores)
+        best_threshold = thresholds[best_idx]
+        logger.info(f"Optimal Threshold (Youden's J): {best_threshold:.4f}")
+        logger.info(f"Sensitivity: {tpr[best_idx]:.4f}")
+        logger.info(f"Specificity: {1 - fpr[best_idx]:.4f}")
+        return best_threshold
+    except Exception as e:
+        logger.error(f"Failed to calculate threshold: {e}")
+        return None
+if __name__ == "__main__":
+    results_file = "results/kaggle_predictions.csv"
+    calculate_optimal_threshold(results_file)

src/create_dummy_image.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import png
+import numpy as np
+# Generate a 1024x1024 random noise image to simulate an X-ray
+width = 512
+height = 512
+img = np.random.randint(0, 65535, (height, width)).astype(np.uint16)
+with open('data/test_xray.png', 'wb') as f:
+    writer = png.Writer(width=width, height=height, greyscale=True, bitdepth=16)
+    writer.write(f, img.tolist())
+print("Created data/test_xray.png")

src/dicom_utils.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import pydicom
+import numpy as np
+import logging
+from PIL import Image
+logger = logging.getLogger(__name__)
+def read_dicom_image(file_path):
+    """
+    Reads a DICOM file and returns it as a NumPy array (grayscale).
+    Handles pixel value scaling and content storage mechanism.
+    """
+    try:
+        ds = pydicom.dcmread(file_path)
+        # Handle pixel data
+        if 'PixelData' not in ds:
+            raise ValueError(f"No pixel data found in {file_path}")
+        image_data = ds.pixel_array.astype(float)
+        # Handle RescaleSlope and RescaleIntercept if present (Map to Hounsfield Units or physical values)
+        slope = getattr(ds, 'RescaleSlope', 1.0)
+        intercept = getattr(ds, 'RescaleIntercept', 0.0)
+        image_data = image_data * slope + intercept
+        # Normalize to 0-255 range for consistency with standard image processing
+        # Note: This discards absolute physical values but preserves structure for the model
+        image_min = np.min(image_data)
+        image_max = np.max(image_data)
+        if image_max != image_min:
+            image_data = (image_data - image_min) / (image_max - image_min) * 255.0
+        else:
+            image_data = np.zeros_like(image_data)
+        image_data = image_data.astype(np.uint8)
+        # Handle photometric interpretation (invert if needed)
+        # MONOCHROME1 typically means 0 is white, 255 is black (inverse of standard X-ray)
+        # We generally want air (black) to be low, bone (white) to be high
+        if ds.PhotometricInterpretation == "MONOCHROME1":
+             image_data = 255 - image_data
+        return image_data
+    except Exception as e:
+        logger.error(f"Error reading DICOM {file_path}: {e}")
+        raise

src/download.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import os
+import wget
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Constants
+DATA_DIR = "data"
+URLS = {
+    "precomputed_image_embeddings.npz": "https://storage.googleapis.com/healthai-us/encoded-data/nih/radiology/cxr/precomputed_image_embeddings.npz",
+    "precomputed_text_embeddings.npz": "https://storage.googleapis.com/healthai-us/encoded-data/nih/radiology/cxr/precomputed_text_embeddings.npz",
+    "cxr14_subset_labels.csv": "https://storage.googleapis.com/healthai-us/encoded-data/nih/radiology/cxr/cxr14_subset_labels.csv",
+    "sample.png": "https://upload.wikimedia.org/wikipedia/commons/c/c8/Chest_Xray_PA_3-8-2010.png"
+}
+def download_file(url, filename, output_dir):
+    """Downloads a file if it doesn't already exist."""
+    filepath = os.path.join(output_dir, filename)
+    if os.path.exists(filepath):
+        logger.info(f"File already exists: {filepath}")
+        return
+    logger.info(f"Downloading {filename} from {url}...")
+    try:
+        wget.download(url, out=filepath)
+        print() # Newline after wget bar
+        logger.info(f"Downloaded {filename}")
+    except Exception as e:
+        logger.error(f"Failed to download {filename}: {e}")
+def main():
+    """Main function to download all required files."""
+    if not os.path.exists(DATA_DIR):
+        os.makedirs(DATA_DIR)
+        logger.info(f"Created directory: {DATA_DIR}")
+    for filename, url in URLS.items():
+        download_file(url, filename, DATA_DIR)
+if __name__ == "__main__":
+    main()

src/evaluate.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import os
+import logging
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.metrics import roc_curve, auc
+logger = logging.getLogger(__name__)
+def evaluate_predictions(scores, true_labels, diagnosis, output_dir="results"):
+    """Calculates AUC and generates ROC plot."""
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    fpr, tpr, thresholds = roc_curve(true_labels, scores)
+    roc_auc = auc(fpr, tpr)
+    logger.info(f"Diagnosis: {diagnosis}")
+    logger.info(f"AUC: {roc_auc:.4f}")
+    # Plot ROC curve
+    plt.figure()
+    lw = 2
+    plt.plot(
+        fpr,
+        tpr,
+        color="darkorange",
+        lw=lw,
+        label="ROC curve (area = %0.2f)" % roc_auc,
+    )
+    plt.plot([0, 1], [0, 1], color="navy", lw=lw, linestyle="--")
+    plt.xlim([0.0, 1.0])
+    plt.ylim([0.0, 1.05])
+    plt.xlabel("False Positive Rate")
+    plt.ylabel("True Positive Rate")
+    plt.title(f"ROC for {diagnosis}")
+    plt.legend(loc="lower right")
+    plot_path = os.path.join(output_dir, f"roc_{diagnosis}.png")
+    plt.savefig(plot_path)
+    logger.info(f"ROC plot saved to {plot_path}")
+    return roc_auc

src/evaluate_kaggle.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import os
+import sys
+import pandas as pd
+import logging
+import argparse
+import numpy as np
+from tqdm import tqdm
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Suppress TensorFlow logging
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+try:
+    import absl.logging
+    absl.logging.set_verbosity(absl.logging.ERROR)
+except ImportError:
+    pass
+import logging
+logging.getLogger('tensorflow').setLevel(logging.ERROR)
+from model import RawImageModel, PrecomputedModel
+from dicom_utils import read_dicom_image
+from PIL import Image
+def main():
+    parser = argparse.ArgumentParser(description="Evaluate on Kaggle DICOM Dataset")
+    parser.add_argument("--csv", default="data/kaggle/labels.csv", help="Path to labels CSV")
+    parser.add_argument("--data-dir", default="data/kaggle", help="Root directory for images if relative paths in CSV")
+    parser.add_argument("--output", default="results/kaggle_predictions.csv", help="Output predictions file")
+    args = parser.parse_args()
+    # Create output directory
+    os.makedirs(os.path.dirname(args.output), exist_ok=True)
+    # Load dataset
+    try:
+        df = pd.read_csv(args.csv)
+        logger.info(f"Loaded {len(df)} records from {args.csv}")
+    except Exception as e:
+        logger.error(f"Failed to load CSV: {e}")
+        return
+    # Check for file column
+    file_col = 'file' if 'file' in df.columns else 'dicom_file' # Adapt to potential column names
+    if file_col not in df.columns and 'file' not in df.columns:
+         # Fallback inspection or error
+         logger.error(f"Missing file column in CSV. Found: {df.columns}")
+         return
+    # Initialize Models
+    try:
+        # We need PrecomputedModel for text embeddings (labels)
+        precomputed_model = PrecomputedModel()
+        # We need RawImageModel for the images
+        raw_model = RawImageModel()
+        logger.info("Models loaded successfully.")
+    except Exception as e:
+        logger.fatal(f"Failed to initialize models: {e}")
+        return
+    # Get text embeddings for diagnosis
+    diagnosis = 'PNEUMOTHORAX'
+    try:
+        # Hardcoded prompts matching main.py
+        pos_txt = 'small pneumothorax'
+        neg_txt = 'no pneumothorax'
+        pos_emb, neg_emb = precomputed_model.get_diagnosis_embeddings(pos_txt, neg_txt)
+    except Exception as e:
+        logger.fatal(f"Failed to get text embeddings: {e}")
+        return
+    predictions = []
+    # Iterate and predict
+    print(f"Running inference for {diagnosis} on {len(df)} images...")
+    temp_path = "temp_inference.png"
+    for _, row in tqdm(df.iterrows(), total=len(df)):
+        file_path = row[file_col]
+        # Construct full path
+        full_path = os.path.join(args.data_dir, file_path) if not os.path.isabs(file_path) else file_path
+        # Check if file exists
+        if not os.path.exists(full_path):
+             logger.warning(f"File not found: {full_path}")
+             predictions.append({
+                'file': file_path,
+                'true_label': None,
+                'pneumothorax_score': None,
+                'error': 'File not found'
+            })
+             continue
+        true_label = row.get('label', row.get('PNEUMOTHORAX', 'Unknown'))
+        try:
+            # 1. Read DICOM
+            image_array = read_dicom_image(full_path)
+            # 2. Save as temp PNG (Required by RawImageModel/TF pipeline currently)
+            Image.fromarray(image_array).save(temp_path)
+            # 3. Compute Image Embedding
+            img_emb = raw_model.compute_embeddings(temp_path)
+            # 4. Compute Zero-Shot Score
+            score = PrecomputedModel.zero_shot(img_emb, pos_emb, neg_emb)
+            predictions.append({
+                'file': file_path,
+                'true_label': true_label,
+                'pneumothorax_score': float(score)
+            })
+        except Exception as e:
+            # logger.warning(f"Failed to process {file_path}: {e}")
+            predictions.append({
+                'file': file_path,
+                'true_label': true_label,
+                'pneumothorax_score': None,
+                'error': str(e)
+            })
+        # Incremental Save every 10 items
+        if len(predictions) % 10 == 0:
+            pd.DataFrame(predictions).to_csv(args.output, index=False)
+    # Final Save
+    results_df = pd.DataFrame(predictions)
+    results_df.to_csv(args.output, index=False)
+    logger.info(f"Predictions saved to {args.output}")
+    # Cleanup
+    if os.path.exists("temp_inference.png"):
+        os.remove("temp_inference.png")
+if __name__ == "__main__":
+    main()

src/main.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import os
+import argparse
+import logging
+import sys
+# Suppress TensorFlow and system warnings
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # FATAL
+os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
+import warnings
+warnings.filterwarnings('ignore')
+import numpy as np
+import pandas as pd
+# Configure logging first
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Suppress absl logging from TensorFlow
+try:
+    import absl.logging
+    absl.logging.set_verbosity(absl.logging.ERROR)
+except ImportError:
+    pass
+# Suppress TensorFlow Python logging
+logging.getLogger('tensorflow').setLevel(logging.ERROR)
+from model import PrecomputedModel, RawImageModel
+from evaluate import evaluate_predictions
+DIAGNOSIS_PROMPTS = {
+    'AIRSPACE_OPACITY': ('Airspace Opacity', 'no evidence of airspace disease'),
+    'PNEUMOTHORAX': ('small pneumothorax', 'no pneumothorax'),
+    'EFFUSION': ('large pleural effusion', 'no pleural effusion'),
+    'PULMONARY_EDEMA': ('moderate pulmonary edema', 'no pulmonary edema'),
+}
+def main():
+    parser = argparse.ArgumentParser(description="Zero-Shot Chest X-Ray Classification")
+    parser.add_argument("--diagnosis", type=str, choices=DIAGNOSIS_PROMPTS.keys(), required=True, help="Diagnosis to evaluate")
+    parser.add_argument("--data-dir", type=str, default="data", help="Path to data directory")
+    parser.add_argument("--raw-image", type=str, help="Path to a raw image file for inference (optional)")
+    args = parser.parse_args()
+    # Get prompts
+    pos_txt, neg_txt = DIAGNOSIS_PROMPTS[args.diagnosis]
+    logger.info(f"Diagnosis: {args.diagnosis}")
+    logger.info(f"Positive query: '{pos_txt}'")
+    logger.info(f"Negative query: '{neg_txt}'")
+    # Load precomputed model for text embeddings (and image embeddings if no raw image)
+    precomputed_model = PrecomputedModel(data_dir=args.data_dir)
+    pos_emb, neg_emb = precomputed_model.get_diagnosis_embeddings(pos_txt, neg_txt)
+    if args.raw_image:
+        # Raw Image Inference Mode
+        logger.info(f"Running inference on raw image: {args.raw_image}")
+        raw_model = RawImageModel()
+        try:
+            image_emb = raw_model.compute_embeddings(args.raw_image)
+            # image_emb shape is likely (1, 32, 128) or (32, 128)
+            # PrecomputedModel.zero_shot expects flattened or (32, 128)
+            score = PrecomputedModel.zero_shot(image_emb, pos_emb, neg_emb)
+            logger.info(f"Zero-shot score for {args.raw_image}: {score:.4f}")
+            # Since we only have one image, we can't calculate AUC meaningfully
+            # unless we run it against the full validation set which takes time.
+            # For this demo, just output the score.
+            print(f"Score for {args.diagnosis}: {score}")
+        except Exception as e:
+            logger.error(f"Failed to process raw image: {e}")
+            sys.exit(1)
+    else:
+        # Precomputed Embeddings Evaluation Mode (Full Dataset)
+        logger.info("Running evaluation on full precomputed dataset...")
+        # Filter labels for the target diagnosis (0 or 1)
+        labels_df = precomputed_model.labels
+        target_df = labels_df[labels_df[args.diagnosis].isin([0, 1])][['image_id', args.diagnosis]].copy()
+        image_ids = target_df['image_id'].tolist()
+        true_labels = target_df[args.diagnosis].tolist()
+        # Compute scores
+        valid_ids, scores = precomputed_model.compute_scores(image_ids, pos_emb, neg_emb)
+        # Filter labels to match valid_ids found in embeddings
+        final_labels = []
+        for img_id, label in zip(image_ids, true_labels):
+            if img_id in valid_ids:
+                final_labels.append(label)
+        if not scores:
+            logger.error("No valid scores computed. Check embedding match.")
+            sys.exit(1)
+        # Evaluate
+        evaluate_predictions(scores, final_labels, args.diagnosis)
+if __name__ == "__main__":
+    main()

src/model.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import os
+import logging
+import numpy as np
+import pandas as pd
+from huggingface_hub import snapshot_download
+# Configure logging
+logger = logging.getLogger(__name__)
+class PrecomputedModel:
+    def __init__(self, data_dir="data"):
+        self.data_dir = data_dir
+        self.image_embeddings = None
+        self.text_embeddings = None
+        self.labels = None
+        self._load_data()
+    def _load_data(self):
+        """Loads precomputed embeddings and labels."""
+        img_emb_path = os.path.join(self.data_dir, "precomputed_image_embeddings.npz")
+        txt_emb_path = os.path.join(self.data_dir, "precomputed_text_embeddings.npz")
+        labels_path = os.path.join(self.data_dir, "cxr14_subset_labels.csv")
+        # Text embeddings are strictly required for Zero-Shot
+        if not os.path.exists(txt_emb_path):
+             raise FileNotFoundError(f"Missing required text embeddings: {txt_emb_path}")
+        logger.info("Loading precomputed text embeddings...")
+        with np.load(txt_emb_path) as data:
+            self.text_embeddings = {key: data[key] for key in data}
+        # Image embeddings (Optional, only for benchmarking)
+        if os.path.exists(img_emb_path):
+            logger.info("Loading precomputed image embeddings...")
+            with np.load(img_emb_path) as data:
+                self.image_embeddings = {key: data[key] for key in data}
+        else:
+            logger.warning("Precomputed image embeddings not found. Benchmarking features will be disabled.")
+        # Labels (Optional)
+        if os.path.exists(labels_path):
+            logger.info("Loading labels...")
+            self.labels = pd.read_csv(labels_path)
+        else:
+             logger.warning("Labels file not found.")
+    def get_diagnosis_embeddings(self, pos_txt, neg_txt):
+        """Retrieves embeddings for positive and negative text queries."""
+        if pos_txt not in self.text_embeddings:
+            raise ValueError(f"Positive query '{pos_txt}' not found in precomputed embeddings.")
+        if neg_txt not in self.text_embeddings:
+            raise ValueError(f"Negative query '{neg_txt}' not found in precomputed embeddings.")
+        return self.text_embeddings[pos_txt], self.text_embeddings[neg_txt]
+    def compute_scores(self, image_ids, pos_emb, neg_emb):
+        """Computes zero-shot scores for a list of image IDs."""
+        scores = []
+        valid_ids = []
+        for img_id in image_ids:
+            if img_id not in self.image_embeddings:
+                continue
+            img_emb = self.image_embeddings[img_id]
+            score = self.zero_shot(img_emb, pos_emb, neg_emb)
+            scores.append(score)
+            valid_ids.append(img_id)
+        return valid_ids, scores
+    @staticmethod
+    def compute_image_text_similarity(image_emb, txt_emb):
+        """Computes cosine similarity between image and text embeddings."""
+        # Image embedding shape: (1, 32, 128) or (32, 128) flattened?
+        # The notebook says: image_emb = np.reshape(image_emb, (32, 128))
+        image_emb = np.reshape(image_emb, (32, 128))
+        similarities = []
+        for i in range(32):
+            # cosine similarity
+            similarity = np.dot(image_emb[i], txt_emb) / (np.linalg.norm(image_emb[i]) * np.linalg.norm(txt_emb))
+            similarities.append(similarity)
+        return np.max(similarities)
+    @classmethod
+    def zero_shot(cls, image_emb, pos_txt_emb, neg_txt_emb):
+        """Computes the zero-shot score (pos_sim - neg_sim)."""
+        pos_cosine = cls.compute_image_text_similarity(image_emb, pos_txt_emb)
+        neg_cosine = cls.compute_image_text_similarity(image_emb, neg_txt_emb)
+        return pos_cosine - neg_cosine
+class RawImageModel:
+    def __init__(self):
+        self.elixrc_model = None
+        self.qformer_model = None
+        self._load_model()
+    def _load_model(self):
+        """Loads the TensorFlow model from Hugging Face."""
+        try:
+            import tensorflow as tf
+            import tensorflow_text as text  # Registers the ops
+        except ImportError:
+            raise ImportError("TensorFlow or tensorflow-text is not installed. Use precomputed mode or install them.")
+        logger.info("Checking for GPU acceleration...")
+        gpus = tf.config.list_physical_devices('GPU')
+        if gpus:
+            logger.info(f"Running on GPU: {gpus}")
+        else:
+            logger.info("Running on CPU. Expect slower inference.")
+        logger.info("Downloading model weights from Hugging Face...")
+        model_path = snapshot_download(
+            repo_id="google/cxr-foundation",
+            allow_patterns=['elixr-c-v2-pooled/*', 'pax-elixr-b-text/*']
+        )
+        logger.info("Loading ELIXR-C (Image Encoder)...")
+        self.elixrc_model = tf.saved_model.load(os.path.join(model_path, 'elixr-c-v2-pooled'))
+        logger.info("Loading QFormer (Adapter)...")
+        self.qformer_model = tf.saved_model.load(os.path.join(model_path, 'pax-elixr-b-text'))
+    def compute_embeddings(self, image_path):
+        """Generates embeddings for a raw image file."""
+        import tensorflow as tf
+        import png # pypng
+        # Load and preprocess image
+        # This follows the notebook's png_to_tfexample logic but simplified or imported
+        # For simplicity, implementing the preprocess logic here
+        try:
+            # Read image using pypng logic or similar
+            # Note: The notebook uses pypng to write to BytesIO then TF reads it.
+            # We can just read the file directly if it's a PNG.
+            with open(image_path, 'rb') as f:
+                image_bytes = f.read()
+            # Create TF Example
+            example = tf.train.Example()
+            features = example.features.feature
+            features['image/encoded'].bytes_list.value.append(image_bytes)
+            features['image/format'].bytes_list.value.append(b'png')
+            serialized_example = example.SerializeToString()
+            # Step 1: ELIXR-C
+            elixrc_infer = self.elixrc_model.signatures['serving_default']
+            elixrc_output = elixrc_infer(input_example=tf.constant([serialized_example]))
+            elixrc_embedding = elixrc_output['feature_maps_0'].numpy() # Shape (1, 8, 8, 1376)
+            # Step 2: QFormer
+            # Initialize text inputs with zeros (as we only want image embeddings)
+            qformer_input = {
+                'image_feature': elixrc_embedding.tolist(),
+                'ids': np.zeros((1, 1, 128), dtype=np.int32).tolist(),
+                'paddings': np.zeros((1, 1, 128), dtype=np.float32).tolist(),
+            }
+            qformer_output = self.qformer_model.signatures['serving_default'](**qformer_input)
+            elixrb_embeddings = qformer_output['all_contrastive_img_emb'].numpy() # Shape (1, 32, 128)
+            return elixrb_embeddings
+        except Exception as e:
+            logger.error(f"Error computing raw embeddings: {e}")
+            raise

src/plot_kaggle_roc.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.metrics import roc_curve, auc
+import logging
+import os
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+def plot_roc_curve(results_path, output_image_path):
+    """
+    Reads predictions CSV, calculates AUC, and plots ROC curve.
+    """
+    if not os.path.exists(results_path):
+        logger.error(f"Results file not found: {results_path}")
+        return
+    try:
+        df = pd.read_csv(results_path)
+        logger.info(f"Loaded {len(df)} predictions from {results_path}")
+        # Filter out errors
+        df = df.dropna(subset=['pneumothorax_score'])
+        if len(df) == 0:
+            logger.error("No valid predictions found.")
+            return
+        # Prepare True Labels (Binary)
+        # Kaggle Labels: 'Pneumothorax' vs 'No Pneumothorax'
+        y_true = (df['true_label'] == 'Pneumothorax').astype(int)
+        y_scores = df['pneumothorax_score']
+        # Calculate ROC and AUC
+        fpr, tpr, thresholds = roc_curve(y_true, y_scores)
+        roc_auc = auc(fpr, tpr)
+        logger.info(f"Calculated AUC: {roc_auc:.4f}")
+        # Plot
+        plt.figure(figsize=(8, 6))
+        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
+        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+        plt.xlim([0.0, 1.0])
+        plt.ylim([0.0, 1.05])
+        plt.xlabel('False Positive Rate')
+        plt.ylabel('True Positive Rate')
+        plt.title('ROC Curve - Zero-Shot Pneumothorax Classification (Kaggle)')
+        plt.legend(loc="lower right")
+        plt.grid(True, alpha=0.3)
+        plt.savefig(output_image_path)
+        logger.info(f"ROC curve saved to {output_image_path}")
+        plt.close()
+    except Exception as e:
+        logger.error(f"Failed to plot ROC curve: {e}")
+if __name__ == "__main__":
+    results_file = "results/kaggle_predictions.csv"
+    output_image = "results/kaggle_roc_curve.png"
+    plot_roc_curve(results_file, output_image)