import time from functools import wraps def reconstruct_dataframe(alpha_low, alpha_med, alpha_high, original_df): # Define the mapping from original values to new alpha parameters value_to_alpha = { 0.00191: alpha_low, 0.00767: alpha_high, 0.0038: alpha_med } # Check if each value is present at least once in the DataFrame for original_value in value_to_alpha.keys(): if not (original_df == original_value).any().any(): raise ValueError(f"Value {original_value} not found in the input DataFrame.") # Create a new DataFrame based on the original one new_df = original_df.copy() # Apply the mapping to each element in the DataFrame for original_value, new_value in value_to_alpha.items(): new_df = new_df.replace(original_value, new_value) return new_df def preprocess_dataFrame(df, headerRow_idx=0, numRowsStart_idx = None, numRowsEnd_idx=None, numColsStart_idx=None, numColsEnd_idx=None, rowNames_idx=None): df.columns = df.iloc[headerRow_idx] #Set the header if rowNames_idx is not None: df.index = df.iloc[:, rowNames_idx] #Set the row names df = df.iloc[numRowsStart_idx : numRowsEnd_idx, numColsStart_idx:numColsEnd_idx] #Slice the dataset to numerical data return df def timeit(f): def timed(*args, **kw): ts = time.time() result = f(*args, **kw) te = time.time() print ('func:%r args:[%r, %r] took: %2.4f sec' % \ (f.__name__, te-ts)) #(f.__name__, args, kw, te-ts)) return result return timed def timing_decorator(func): @wraps(func) def wrapper(*args, **kwargs): start_time = time.time() result = func(*args, **kwargs) end_time = time.time() duration = end_time - start_time timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)) print(f"{func.__name__} took {duration:.4f} seconds. Finished at {timestamp}") return result return wrapper # Function to compare two dataframes after converting and rounding def compare_dataframes(df1, df2, decimals=8): # Function to convert DataFrame columns to float and then round def convert_and_round_dataframe(df, decimals): # Convert all columns to float df_float = df.astype(float) # Round to the specified number of decimals return df_float.round(decimals) rounded_df1 = convert_and_round_dataframe(df1, decimals) rounded_df2 = convert_and_round_dataframe(df2, decimals) are_equal = rounded_df1.equals(rounded_df2) print("Both methods are equal:", are_equal) print("Numba shape:", df2.shape) print("Original shape:", df1.shape) print("======== ORIGINAL OUTPUT (first item in output list, head() for the first 5 columns)") print(df1.iloc[0:5].head(2)) print("======== New method OUTPUT (first item in output list, head() for the first 5 columns)") print(df2.iloc[0:5].head(2)) def align_dataframes(df1, df2, key): """ Align two dataframes based on a common key, ensuring that both dataframes have only the rows with matching keys. Parameters: - df1: First dataframe. - df2: Second dataframe. - key: Column name to align dataframes on. Returns: - df1_aligned, df2_aligned: Tuple of aligned dataframes. """ common_ids = df1.index.intersection(df2[key]) df1_aligned = df1.loc[common_ids] df2_aligned = df2[df2[key].isin(common_ids)].set_index(key, drop=False) return df1_aligned, df2_aligned