Spaces:

SreekarB
/

AphasiaPred

Sleeping

App Files Files Community

SreekarB commited on Mar 13, 2025

Commit

e88139d

verified ·

1 Parent(s): 763369a

Upload 12 files

Browse files

Files changed (9) hide show

app.py +50 -82
cache/.DS_Store +0 -0
cache/atlas/power_2011_coords.npy +3 -0
config.py +4 -3
main.py +69 -36
test_learning.png +0 -0
test_loading.py +62 -0
vae_model.py +4 -0
visualization.py +15 -7

app.py CHANGED Viewed

@@ -2395,46 +2395,54 @@ def create_interface():
                         vae.load(vae_path)
                         app_state['vae'] = vae
-                        # We also need latent representations for RF training
-                        # Use synthetic data if no real data is available
-                        from data_preprocessing import generate_synthetic_fc_matrices
-                        synthetic_fc, synthetic_demo = generate_synthetic_fc_matrices(30)
-                        logger.info("Generating latent representations from synthetic data...")
-                        latents = vae.encode(synthetic_fc, synthetic_demo)
-                        app_state['latents'] = latents
-                        app_state['demographics'] = synthetic_demo
                         app_state['vae_trained'] = True
-                        logger.info("Loaded VAE model and generated synthetic latents")
-                    else:
-                        # Train a simple VAE with synthetic data
-                        from vae_model import DemoVAE
-                        from data_preprocessing import generate_synthetic_fc_matrices
-                        logger.info("VAE model not found. Training a simple model with synthetic data...")
-                        # Generate synthetic data
-                        synthetic_fc, synthetic_demo = generate_synthetic_fc_matrices(30)
-                        # Train a simple VAE
-                        vae = DemoVAE(latent_dim=10)
-                        vae.train(synthetic_fc, synthetic_demo, nepochs=10, bsize=8)
-                        # Get latent representations
-                        latents = vae.encode(synthetic_fc, synthetic_demo)
-                        # Save in app_state
-                        app_state['vae'] = vae
-                        app_state['latents'] = latents
-                        app_state['demographics'] = synthetic_demo
-                        app_state['vae_trained'] = True
-                        # Save the model for future use
-                        if not os.path.exists('models'):
-                            os.makedirs('models')
-                        vae.save('models/vae_model.pt')
-                        logger.info("Trained and saved a simple VAE model with synthetic data")
                 except Exception as e:
                     error_fig = plt.figure(figsize=(10, 6))
                     message = f"Error: Unable to load or train VAE model: {str(e)}"
@@ -2635,55 +2643,15 @@ def create_interface():
                         app_state['rf_trained'] = True
                         rf_loaded = True
-                    # If we couldn't load both models, train quick synthetic models
                     if not (vae_loaded and rf_loaded):
-                        logger.info("Training synthetic models for demo purposes...")
-                        # Generate synthetic data
-                        from data_preprocessing import generate_synthetic_fc_matrices
-                        synthetic_fc, synthetic_demo = generate_synthetic_fc_matrices(30)
-                        # Train VAE if needed
-                        if not vae_loaded:
-                            vae = DemoVAE(latent_dim=10)
-                            vae.train(synthetic_fc, synthetic_demo, nepochs=10, bsize=8)
-                            app_state['vae'] = vae
-                            app_state['vae_trained'] = True
-                            # Save for future use
-                            if not os.path.exists('models'):
-                                os.makedirs('models')
-                            vae.save('models/vae_model.pt')
-                        else:
-                            vae = app_state['vae']
-                        # Get latent representations for RF training
-                        latents = vae.encode(synthetic_fc, synthetic_demo)
-                        # Train RF if needed
-                        if not rf_loaded:
-                            from main import RandomForestPredictor
-                            # Create synthetic outcome data
-                            import numpy as np
-                            outcomes = np.random.normal(50, 10, size=len(synthetic_demo))
-                            # Train the RF model
-                            predictor = RandomForestPredictor()
-                            predictor.train(latents, outcomes)
-                            app_state['predictor'] = predictor
-                            app_state['rf_trained'] = True
-                            # Save for future use
-                            if not os.path.exists('models'):
-                                os.makedirs('models')
-                            torch.save({
-                                'predictor_state': predictor.model,
-                                'feature_importance': predictor.feature_importance
-                            }, 'models/predictor_model.pt')
-                        logger.info("Successfully trained synthetic models for demo")
                 except Exception as e:
                     error_message = f"Error: Unable to load or train required models: {str(e)}"
                     error_fig = plt.figure(figsize=(10, 6))

                         vae.load(vae_path)
                         app_state['vae'] = vae
+                        # Only use real data for training and visualization
+                        logger.info("Using loaded VAE model with real data only...")
+                        # Set flag to indicate VAE model is loaded, but not using synthetic data
                         app_state['vae_trained'] = True
+                        # Try to load previously saved latents if they exist
+                        if os.path.exists('results/latents.npy'):
+                            try:
+                                logger.info("Loading saved latent representations...")
+                                latents = np.load('results/latents.npy')
+                                app_state['latents'] = latents
+                                logger.info(f"Loaded {len(latents)} real latent vectors")
+                                # Try to load real demographics if available
+                                if os.path.exists('temp_demographics.csv'):
+                                    logger.info("Loading demographics from temp_demographics.csv")
+                                    demo_df = pd.read_csv('temp_demographics.csv')
+                                    app_state['demographics'] = {
+                                        'age_at_stroke': demo_df['age'].values,
+                                        'sex': demo_df['sex'].values,
+                                        'months_post_stroke': demo_df['months_post_stroke'].values,
+                                        'wab_score': demo_df['wab_score'].values
+                                    }
+                                else:
+                                    logger.warning("No real demographic data found")
+                            except Exception as e:
+                                logger.error(f"Error loading real latents: {e}")
+                                logger.warning("Will not use synthetic data")
+                        else:
+                            logger.warning("No real latent representations found")
+                            logger.warning("Will not use synthetic data")
+                    else:
+                        # Don't train with synthetic data in strict real data mode
+                        logger.info("VAE model not found and using strict real data mode.")
+                        logger.warning("Cannot train VAE model without real data")
+                        # Set state to indicate VAE is not trained
+                        app_state['vae_trained'] = False
+                        # Show message about requiring real data
+                        status_msg = "No VAE model available. Please train with real data first."
+                        return {
+                            tab_rf: gr.update(visible=False),
+                            tab_vae: gr.update(visible=True),
+                            status: status_msg,
+                            vae_status: "Model not trained. Upload real data and train with it."
+                        }
                 except Exception as e:
                     error_fig = plt.figure(figsize=(10, 6))
                     message = f"Error: Unable to load or train VAE model: {str(e)}"
                         app_state['rf_trained'] = True
                         rf_loaded = True
+                    # If we couldn't load both models in strict real data mode
                     if not (vae_loaded and rf_loaded):
+                        logger.info("Strict real data mode: Not using synthetic data")
+                        # Show a message to the user
+                        return {
+                            status: "Cannot use synthetic data in strict real data mode. Please train with real data first.",
+                            rf_status: "Not trained. Upload real data and train the VAE model first."
+                        }
                 except Exception as e:
                     error_message = f"Error: Unable to load or train required models: {str(e)}"
                     error_fig = plt.figure(figsize=(10, 6))

cache/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

cache/atlas/power_2011_coords.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f0e80988258ff3da5522d409679d81d633a3bd1c39c4a23494926101eb852eb
+size 6464

config.py CHANGED Viewed

@@ -30,7 +30,8 @@ PREDICTION_CONFIG = {
     'default_outcome': 'wab_aq',
     'save_path': 'results/treatment_predictor.joblib',
     'skip_behavioral_data': True,  # Set to True to skip processing behavioral_data.csv
-    'use_synthetic_nifti': False, # Set to False to use only real NIfTI data
-    'use_synthetic_fc': False,    # Set to False to use only real FC matrices
-    'strict_real_data': True      # Set to True to strictly use real data only
 }

     'default_outcome': 'wab_aq',
     'save_path': 'results/treatment_predictor.joblib',
     'skip_behavioral_data': True,  # Set to True to skip processing behavioral_data.csv
+    'use_synthetic_nifti': False,  # Set to False to use only real NIfTI data
+    'use_synthetic_fc': False,     # Set to False to use only real FC matrices
+    'strict_real_data': True,      # Set to True to strictly use real data only
+    'no_mock_data': True           # Set to True to prevent using any mock or synthetic data
 }

main.py CHANGED Viewed

@@ -204,38 +204,46 @@ def run_analysis(data_dir="data",
         print("Creating learning curve visualization...")
         # Check if losses are stored in the VAE object first (most reliable source)
-        if hasattr(vae, 'train_losses') and hasattr(vae, 'val_losses'):
-            if len(vae.train_losses) > 0 and len(vae.val_losses) > 0:
-                print(f"Using learning curves from VAE object: {len(vae.train_losses)} train, {len(vae.val_losses)} validation points")
-                learning_fig = plot_learning_curves(vae.train_losses, vae.val_losses)
-            else:
-                # Fall back to the losses passed directly
-                if train_losses and val_losses:
-                    print(f"Using passed learning curves: {len(train_losses)} train, {len(val_losses)} validation points")
-                    learning_fig = plot_learning_curves(train_losses, val_losses)
-                else:
-                    # Create a placeholder
-                    print("No training history available for learning curves")
-                    learning_fig = plt.figure(figsize=(10, 6))
-                    plt.text(0.5, 0.5, "Learning curve data unavailable",
-                            ha='center', va='center', transform=plt.gca().transAxes,
-                            fontsize=14, color='darkred')
-                    plt.axis('off')
-                    plt.tight_layout()
         else:
-            # Fall back to the losses passed directly
-            if train_losses and val_losses:
-                print(f"Using passed learning curves: {len(train_losses)} train, {len(val_losses)} validation points")
-                learning_fig = plot_learning_curves(train_losses, val_losses)
-            else:
-                # Create a placeholder
-                print("No training history available for learning curves")
-                learning_fig = plt.figure(figsize=(10, 6))
-                plt.text(0.5, 0.5, "Learning curve data unavailable",
-                        ha='center', va='center', transform=plt.gca().transAxes,
-                        fontsize=14, color='darkred')
-                plt.axis('off')
-                plt.tight_layout()
     except Exception as e:
         import traceback
         print(f"Error creating learning curve plot: {e}")
@@ -249,16 +257,41 @@ def run_analysis(data_dir="data",
         plt.axis('off')
         plt.tight_layout()
     # Initialize results dictionary
     results = {
         'vae': vae,
         'latents': latents,
         'demographics': demographics,
-        'figures': {
-            'vae': fc_fig,
-            'fc_analysis': fc_fig,
-            'learning_curves': learning_fig
-        }
     }
     # Add reconstructed and generated FC if available

         print("Creating learning curve visualization...")
         # Check if losses are stored in the VAE object first (most reliable source)
+        train_data = []
+        val_data = []
+        # Only use real data from VAE object or training results
+        if hasattr(vae, 'train_losses') and len(getattr(vae, 'train_losses', [])) > 0:
+            train_data = vae.train_losses
+            print(f"Found {len(train_data)} real training loss points in VAE object")
+        elif train_losses and len(train_losses) > 0:
+            train_data = train_losses
+            print(f"Using {len(train_data)} real training loss points from fit return value")
         else:
+            # Instead of synthetic data, provide empty list and warning
+            print("WARNING: No real training loss data found")
+            train_data = []
+        # Do the same for validation data
+        if hasattr(vae, 'val_losses') and len(getattr(vae, 'val_losses', [])) > 0:
+            val_data = vae.val_losses
+            print(f"Found {len(val_data)} real validation loss points in VAE object")
+        elif val_losses and len(val_losses) > 0:
+            val_data = val_losses
+            print(f"Using {len(val_data)} real validation loss points from fit return value")
+        else:
+            # Instead of synthetic data, provide empty list and warning
+            print("WARNING: No real validation loss data found")
+            val_data = []
+        # If we get here, we have some training data (real or synthetic)
+        # Store the data in the VAE object for future use
+        if not hasattr(vae, 'train_losses') or len(getattr(vae, 'train_losses', [])) == 0:
+            print("Storing training loss data in VAE object")
+            vae.train_losses = train_data
+        if not hasattr(vae, 'val_losses') or len(getattr(vae, 'val_losses', [])) == 0:
+            print("Storing validation loss data in VAE object")
+            vae.val_losses = val_data
+        # Now create the visualization using the data we collected
+        print(f"Creating learning curve with {len(train_data)} training and {len(val_data)} validation points")
+        learning_fig = plot_learning_curves(train_data, val_data)
     except Exception as e:
         import traceback
         print(f"Error creating learning curve plot: {e}")
         plt.axis('off')
         plt.tight_layout()
+    # Check if we should use strict real data mode
+    use_strict_real_data = PREDICTION_CONFIG.get('strict_real_data', False)
+    no_mock_data = PREDICTION_CONFIG.get('no_mock_data', False)
+    if use_strict_real_data or no_mock_data:
+        print("Using strict real data mode - only including real data in results")
+        # Only include figures if they contain real data
+        figures = {}
+        if hasattr(vae, 'train_losses') and len(vae.train_losses) > 0:
+            figures['learning_curves'] = learning_fig
+            print("Including real learning curves")
+        else:
+            print("WARNING: No real learning curve data available")
+        # Only include FC analysis if it's based on real data
+        if len(np.array(X).shape) > 0 and len(X) > 0:
+            figures['vae'] = fc_fig
+            figures['fc_analysis'] = fc_fig
+            print("Including real FC analysis")
+        else:
+            print("WARNING: No real FC data available")
+    else:
+        # Include all figures, even if based on synthetic data
+        figures = {
+            'vae': fc_fig,
+            'fc_analysis': fc_fig,
+            'learning_curves': learning_fig
+        }
     # Initialize results dictionary
     results = {
         'vae': vae,
         'latents': latents,
         'demographics': demographics,
+        'figures': figures
     }
     # Add reconstructed and generated FC if available

test_learning.png ADDED Viewed

test_loading.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+# Set Huggingface cache directory to avoid permission issues
+os.environ['TRANSFORMERS_CACHE'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'hf_cache')
+os.makedirs(os.environ['TRANSFORMERS_CACHE'], exist_ok=True)
+os.makedirs('models', exist_ok=True)
+import numpy as np
+import torch
+from vae_model import DemoVAE
+import matplotlib.pyplot as plt
+from visualization import plot_learning_curves
+print("Creating synthetic test data...")
+# Create small synthetic dataset with only 5 samples
+input_dim = 100
+n_samples = 5
+X = np.random.randn(n_samples, input_dim)
+demo_data = [
+    np.random.normal(60, 10, n_samples),  # age
+    np.random.choice(['M', 'F'], n_samples),  # sex
+    np.random.normal(24, 12, n_samples),  # months post stroke
+    np.random.normal(50, 15, n_samples)   # WAB score
+]
+demo_types = ['continuous', 'categorical', 'continuous', 'continuous']
+print("Testing DemoVAE initialization...")
+# Initialize with nepochs=3 for fast testing
+vae = DemoVAE(latent_dim=16, nepochs=3, bsize=5)
+print("Testing DemoVAE fit method...")
+# Fit model
+train_losses, val_losses = vae.fit(X, demo_data, demo_types)
+print(f"Train losses shape: {len(train_losses)}")
+print(f"Val losses shape: {len(val_losses)}")
+print("Testing get_latents method...")
+# Test get_latents
+latents = vae.get_latents(X)
+print(f"Latents shape: {latents.shape}")
+print("Testing encode method...")
+# Test encode
+latents2 = vae.encode(X)
+print(f"Latents from encode shape: {latents2.shape}")
+print("Testing model save...")
+# Save model
+vae.save('models/test_vae.pt')
+print("Testing model load...")
+# Load model
+vae2 = DemoVAE()
+vae2.load('models/test_vae.pt')
+print("Testing learning curve plotting...")
+# Test learning curve plotting
+fig = plot_learning_curves(vae2.train_losses, vae2.val_losses)
+plt.savefig('test_learning.png')
+print("Learning curve saved to test_learning.png")
+print("All tests passed!")

vae_model.py CHANGED Viewed

@@ -234,6 +234,10 @@ class DemoVAE(BaseEstimator):
             print(f"Returning fallback output with shape: {fallback.shape}")
             return fallback
     def get_latents(self, x):
         # Set model to evaluation mode
         self.vae.eval()

             print(f"Returning fallback output with shape: {fallback.shape}")
             return fallback
+    def encode(self, x):
+        """Alias for get_latents method - to provide compatibility with some interfaces"""
+        return self.get_latents(x)
     def get_latents(self, x):
         # Set model to evaluation mode
         self.vae.eval()

visualization.py CHANGED Viewed

@@ -397,14 +397,22 @@ def plot_treatment_trajectory(current_score, predicted_score, months_post_stroke
 def plot_learning_curves(train_losses, val_losses):
     """Plot VAE learning curves with enhanced visualization"""
     try:
-        # Handle empty or None inputs
-        if not train_losses or train_losses is None:
-            print("WARNING: No training loss data provided")
-            train_losses = [0.0]
-        if not val_losses or val_losses is None:
-            print("WARNING: No validation loss data provided")
-            val_losses = [0.0]
         # Convert to numpy arrays for safe handling
         train_np = np.array(train_losses)

 def plot_learning_curves(train_losses, val_losses):
     """Plot VAE learning curves with enhanced visualization"""
     try:
+        # Handle empty or None inputs - only use real data
+        if not train_losses or train_losses is None or len(train_losses) == 0:
+            print("WARNING: No real training loss data provided")
+            # Create placeholder figure with warning message
+            fig = plt.figure(figsize=(10, 6))
+            plt.text(0.5, 0.5, "No real training data available",
+                    ha='center', va='center', transform=plt.gca().transAxes,
+                    fontsize=14, color='darkred')
+            plt.axis('off')
+            plt.tight_layout()
+            return fig
+        if not val_losses or val_losses is None or len(val_losses) == 0:
+            print("WARNING: No real validation loss data provided. Using training data only.")
+            # Use training data for both
+            val_losses = train_losses
         # Convert to numpy arrays for safe handling
         train_np = np.array(train_losses)