Spaces:

serJD
/

RECODE_HF_tripGeneration

Sleeping

+{
+    "STREAM_ID": "ebcfc50abe",
+    "BRANCH_NAME_LAND_USES": "graph_geometry/activity_nodes_with_land_use",
+    "BRANCH_NAME_DISTANCE_MATRIX": "graph_geometry/distance_matrix",
+    "BRANCH_NAME_METRIC_DIST_MATRIX": "graph_geometry/metric_matrix",
+    "TARGET_BRANCH_TM": "graph_geometry/trip_matrix",
+    "distanceMatrixName": "activity_node+distance_matrix_ped_mm_art_noEntr",
+    "SCENARIO_NAME": "PED-MM-DRT_noEntryTime",
+    "TARGET_TRIP_RATE": 3.2,
+    "SCALING_FACTOR": 1,
+    "ALPHA_LOW": 0.0023,
+    "ALPHA_MED": 0.0038,
+    "ALPHA_HIGH": 0.0076,
+    "ALPHA_UNIFORM": 0.0038,
+    "ALPHA": 0.00066,
+    "XLS_FILE_PATH":"Trip Generation.xlsx",
+    "F_VALUES_MANUAL": {
+      "activity_node+distance_matrix_ped_mm_art_noEntr": 0,
+      "activity_node+distance_matrix_ped_mm_noEntr": 0.55,
+      "activity_node+distance_matrix_ped_noEntr": -0.08,
+      "activity_node+distance_matrix_ped_art_noEntr": -0.5
+    },
+    "distance_matrices_of_interest": [
+      "activity_node+distance_matrix_ped_mm_art_noEntr",
+      "activity_node+distance_matrix_ped_mm_noEntr",
+      "activity_node+distance_matrix_ped_art_noEntr",
+      "activity_node+distance_matrix_ped_noEntr"
+    ],
+    "metric_matrices_of_interest": [
+        "activity_node+metric_matrix_ped_mm_art",
+        "activity_node+metric_matrix_ped_mm",
+        "activity_node+metric_matrix_ped_art",
+        "activity_node+metric_matrix_ped"
+    ],
+    "redistributeTrips": [
+      {
+        "from": "activity_node+distance_matrix_ped_mm_art_noEntr",
+        "to": [
+          "activity_node+distance_matrix_ped_mm_noEntr",
+          "activity_node+distance_matrix_ped_art_noEntr",
+          "activity_node+distance_matrix_ped_noEntr"
+        ]
+      },
+      {
+        "from": "activity_node+distance_matrix_ped_art_noEntr",
+        "to": [
+          "activity_node+distance_matrix_ped_noEntr"
+        ]
+      },
+      {
+        "from": "activity_node+distance_matrix_ped_mm_noEntr",
+        "to": [
+          "activity_node+distance_matrix_ped_noEntr"
+        ]
+      }
+    ],
+    "DISTANCE_BRACKETS": [
+      800,
+      2400,
+      4800
+    ]
+  }

main copy.py ADDED Viewed

	@@ -0,0 +1,333 @@

+import os
+import json
+import pandas as pd
+import copy
+from functools import wraps
+from specklepy.api.client import SpeckleClient
+from tripGenerationFunc import *
+import speckle_utils
+import data_utils
+# get config file:# Parse JSON
+current_directory = os.path.dirname(os.path.abspath(__file__))
+# Path to the config.json file
+config_file_path = os.path.join(current_directory, "config.json")
+#def runAll():
+    speckle_token = os.environ.get("SPECKLE_TOKEN")
+# Check if the config.json file exists
+if os.path.exists(config_file_path):
+    # Load the JSON data from config.json
+    with open(config_file_path, 'r') as f:
+        config = json.load(f)
+    # Convert to Python variables with the same names as the keys in the JSON
+    locals().update(config)
+    print("varaibles from json")
+    # Now you can access the variables directly
+    print(STREAM_ID)
+    print(BRANCH_NAME_LAND_USES)
+    print(TARGET_TRIP_RATE)
+    print(ALPHA_LOW)
+    print(F_VALUES_MANUAL)
+    print(distance_matrices_of_interest)
+    print(redistributeTrips)
+    print(DISTANCE_BRACKETS)
+    print(XLS_FILE_PATH)
+    print("==================")
+else:
+    print("Error: config.json file not found in the current directory.")
+xls_file_path = os.path.join(current_directory, XLS_FILE_PATH)
+print("full path", xls_file_path)
+# fetch speckle data
+CLIENT = SpeckleClient(host="https://speckle.xyz/")
+CLIENT.authenticate_with_token(token="52566d1047b881764e16ad238356abeb2fc35d8b42")
+# get land use stream
+stream_land_use = speckle_utils.getSpeckleStream(STREAM_ID,
+                                        BRANCH_NAME_LAND_USES,
+                                        CLIENT,
+                                        commit_id = "")
+# navigate to list with speckle objects of interest
+stream_data = stream_land_use["@Data"]["@{0}"]
+# transform stream_data to dataframe (create a backup copy of this dataframe)
+df_speckle_lu = speckle_utils.get_dataframe(stream_data, return_original_df=False)
+df_main = df_speckle_lu.copy()
+# set index column
+df_main =  df_main.set_index("ids", drop=False)
+# get distance matrix stream
+stream_distance_matrice = speckle_utils.getSpeckleStream(STREAM_ID,
+                                        BRANCH_NAME_DISTANCE_MATRIX,
+                                        CLIENT,
+                                        commit_id = "")
+# navigate to list with speckle objects of interest
+distance_matrices = {}
+for distM in stream_distance_matrice["@Data"]['@{0}']:
+for kk in distM.__dict__.keys():
+    try:
+    if kk.split("+")[1].startswith("distance_matrix"):
+        distance_matrix_dict = json.loads(distM[kk])
+        origin_ids = distance_matrix_dict["origin_uuid"]
+        destination_ids = distance_matrix_dict["destination_uuid"]
+        distance_matrix =  distance_matrix_dict["matrix"]
+        # Convert the distance matrix to a DataFrame
+        df_distances = pd.DataFrame(distance_matrix, index=origin_ids, columns=destination_ids)
+        # i want to add the index & colum names to dist_m_csv
+        #distance_matrices[kk]  = dist_m_csv[kk]
+        distance_matrices[kk] = df_distances
+    except:
+    pass
+# get metric matrix stream
+stream_metric_matrice = speckle_utils.getSpeckleStream(STREAM_ID,
+                                        BRANCH_NAME_METRIC_DIST_MATRIX,
+                                        CLIENT,
+                                        commit_id = "")
+# navigate to list with speckle objects of interest
+metric_matrices = {}
+for distM in stream_metric_matrice["@Data"]['@{0}']:
+print(distM.__dict__.keys())
+for kk in distM.__dict__.keys():
+    try:
+    if kk.split("+")[1].startswith("metric_matrix"):
+        metric_matrix_dict = json.loads(distM[kk])
+        origin_ids = metric_matrix_dict["origin_uuid"]
+        destination_ids = metric_matrix_dict["destination_uuid"]
+        metric_matrix =  metric_matrix_dict["matrix"]
+        # Convert the distance matrix to a DataFrame
+        df_metric_dist = pd.DataFrame(metric_matrix, index=origin_ids, columns=destination_ids)
+        metric_matrices[kk] = df_metric_dist*10 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        print("metric_matrix_dict", metric_matrix_dict.keys())
+    except:
+    pass
+metric_matrices =  extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest)
+sourceCommits = {
+"landuseCommitID": stream_land_use.id,
+    "distanceMatrixCommitID": stream_distance_matrice.id,
+    "metricMatrixCommitID": stream_metric_matrice.id
+}
+# READ XLS FILE ======================================
+# Read Excel file into Pandas DataFrame
+#Production
+# Load Excel file separately
+#xls_file_path = os.path.join(current_directory, XLS_FILE_PATH)
+if os.path.exists(xls_file_path):
+    # Production
+    df_production = pd.read_excel(xls_file_path, sheet_name='Production')
+    df_production_transposed = df_production.T
+    df_production = preprocess_dataFrame(df_production, headerRow_idx=2, numRowsStart_idx=3)
+    df_production_transposed = preprocess_dataFrame(df_production_transposed, headerRow_idx=0, numRowsStart_idx=4,
+                                                numColsStart_idx=4, rowNames_idx=2)
+    # Attraction
+    df_attraction = pd.read_excel(xls_file_path, sheet_name='Attraction')
+    df_attraction = preprocess_dataFrame(df_attraction, headerRow_idx=0, numRowsStart_idx=2)
+    # Distribution_Matrix
+    df_distributionMatrix = pd.read_excel(xls_file_path, sheet_name='Distribution_Matrix')
+    df_distributionMatrix = preprocess_dataFrame(df_distributionMatrix, headerRow_idx=0, numRowsStart_idx=2,
+                                                numRowsEnd_idx=None, numColsStart_idx=2, numColsEnd_idx=None,
+                                                rowNames_idx=0)
+    # Alphas
+    df_alphas = pd.read_excel(xls_file_path, sheet_name='Alphas')
+    df_alphas.columns = df_alphas.iloc[1]
+    df_alphas = df_alphas.iloc[0, 2:]
+    # Land use
+    df_lu = pd.read_excel(xls_file_path, sheet_name='Example_Land_Use')
+    df_lu = preprocess_dataFrame(df_lu, headerRow_idx=0, numRowsStart_idx=1)
+    df_lu["nameCombined"] = df_lu.iloc[:, 1].astype(str) + "+" + df_lu.iloc[:, 0].astype(str)
+    # Distance Matrix
+    df_distMatrix = pd.read_excel(xls_file_path, sheet_name='Example_Distance_Matrix')
+    df_distMatrix = preprocess_dataFrame(df_distMatrix, headerRow_idx=0, numRowsStart_idx=1, numRowsEnd_idx=None,
+                                        numColsStart_idx=1, numColsEnd_idx=None, rowNames_idx=0)
+else:
+    print("Error: Excel file specified in config.json not found.")
+# Land use strucutre =======
+# THIS IS THE DISTANCE MATRIX THATS USED DOWN THE ROAD
+df_distances_aligned, df_lu_stream_aligned = align_dataframes(distance_matrices[distanceMatrixName], df_main, 'ids')
+#Create a df with lanuses
+lu_cols = [col for col in df_lu_stream_aligned.columns if col.startswith("lu+")]
+df_lu_stream = df_lu_stream_aligned[lu_cols]
+# Remove "lu+" from the beginning of column names
+df_lu_stream.columns = df_lu_stream.columns.str.lstrip('lu+')
+df_lu_stream = df_lu_stream.T
+df_lu_stream_t = df_lu_stream.T
+df_lu_stream_with_nameLu_column = df_lu_stream.reset_index(drop=False).rename(columns={'index': 'nameLu'})
+#---
+df_lu_names_xlsx = pd.concat([df_lu.iloc[:, 0:2], df_lu.iloc[:, -1]], axis=1)
+df_lu_names_xlsx.index = df_lu_names_xlsx.iloc[:, 1]
+column_names = ['nameTripType', 'nameLu', 'nameCombined']
+df_lu_names_xlsx.columns = column_names
+print(f"df_lu_names_xlsx shape: {df_lu_names_xlsx.shape}")
+df_lu_names_xlsx.head()
+#--
+# Merge DataFrames using an outer join
+merged_df = pd.merge(df_lu_stream_with_nameLu_column, df_lu_names_xlsx, on='nameLu', how='outer')
+# Get the unique names and their counts from df_lu_names_xlsx
+name_counts = df_lu_names_xlsx['nameLu'].value_counts()
+#print(name_counts)
+# Identify names in df_lu_stream_with_nameLu_column that are not in df_lu_names_xlsx
+missing_names = df_lu_stream_with_nameLu_column.loc[~df_lu_stream_with_nameLu_column['nameLu'].isin(df_lu_names_xlsx['nameLu'])]
+# Append missing rows to df_lu_stream_with_nameLu_column
+df_lu_stream_duplicated = pd.concat([merged_df, missing_names], ignore_index=True)
+#--
+# Find names in df_lu_names_xlsx that are not in df_lu_stream_with_nameLu_column
+missing_names = df_lu_names_xlsx.loc[~df_lu_names_xlsx['nameLu'].isin(df_lu_stream_with_nameLu_column['nameLu'])]
+#--
+# print existing names (?)
+df_lu_names_sorted = df_lu_names_xlsx.sort_values(by='nameLu')
+df_lu_stream_duplicated_sorted = df_lu_stream_duplicated.sort_values(by='nameLu')
+#--
+# Merge DataFrames to get the order of names
+merged_order = pd.merge(df_lu_names_xlsx[['nameCombined']], df_lu_stream_duplicated[['nameCombined']], on='nameCombined', how='inner')
+# Sort df_lu_stream_duplicated based on the order of names in df_lu_names_xlsx
+df_lu_stream_sorted = df_lu_stream_duplicated.sort_values(by='nameCombined', key=lambda x: pd.Categorical(x, categories=merged_order['nameCombined'], ordered=True))
+# Reorganize columns
+column_order = ['nameTripType', 'nameCombined'] + [col for col in df_lu_stream_sorted.columns if col not in ['nameTripType', 'nameCombined']]
+# Create a new DataFrame with the desired column order
+df_lu_stream_reordered = df_lu_stream_sorted[column_order]
+df_lu_stream_reordered_t = df_lu_stream_reordered.T
+#--
+df_lu_stream_with_index = df_lu_stream_reordered_t.reset_index(drop=False).rename(columns={'index': 'ids'})
+df_lu_stream_with_index.index = df_lu_stream_reordered_t.index
+df_lu_num_t_index = df_lu_stream_with_index.iloc[3:]
+df_distances_aligned_index = df_distances_aligned.reset_index(drop=False).rename(columns={'index': 'ids'})
+df_distances_aligned_index.index = df_distances_aligned.index
+df_lu_namesCombined = df_lu_stream_with_index.loc["nameCombined"].iloc[1:]
+# Sort df_lu_stream_with_index based on the 'ids' column in df_distances_aligned_index
+df_lu_stream_sorted = df_lu_stream_with_index.sort_values(by=['ids'], key=lambda x: pd.Categorical(x, categories=df_distances_aligned_index['ids'], ordered=True))
+df_lu_num = df_lu_stream_sorted.T.iloc[1:, :-3]
+df_lu_num.index = df_lu_namesCombined
+df_distMatrix_speckle = df_distances_aligned
+df_attraction_num = df_attraction.reset_index().iloc[:-1, 6:]
+# =============================================================================
+# TRIP GENERATION
+# ATTRACTION & PRODUCTION ======================================================
+"""
+INPUTS
+df_attraction_num
+df_lu_num
+df_production
+df_lu
+df_production_transposed
+"""
+df_attraction_proNode_sum_total = attraction_proNode_full_iter(df_attraction_num, df_lu_num, True)
+#Get the sqmProPerson
+df_sqmProPerson = df_production.iloc[0, 4:].reset_index()[3]
+#Get the trip rate
+df_tripRate = copy.deepcopy(df_production) # create a copy ensures df_tripRate doenst point to df_production
+df_tripRate.index = df_tripRate.iloc[:, 0] #Set the row names
+df_tripRate = df_tripRate.iloc[1:, 2]
+#Numerical df from production ==============================================
+df_production_num = df_production.iloc[1:, 4:]
+df_production_transposed1 = df_production_num.T
+df_total_trips_allNodes = production_proNode_total(df_lu,
+                                                df_sqmProPerson,
+                                                df_tripRate,
+                                                df_production_num,
+                                                df_production_transposed,
+                                                df_lu_num, printSteps=False)
+# Convert data types to float
+df_total_trips_allNodes = df_total_trips_allNodes.astype(float)
+df_tripRate = df_tripRate.astype(float)
+df_total_trips_allNodes_sumPerson = df_total_trips_allNodes.div(df_tripRate, axis=0).sum()
+df_total_trips_allNodes_sumPerson_proCat = df_total_trips_allNodes.div(df_tripRate, axis=0)
+df_total_trips_allNodes_sumPerson_proCat_t = df_total_trips_allNodes_sumPerson_proCat.T
+df_total_trips_allNodes_sumPerson_proCat_t_sum = df_total_trips_allNodes_sumPerson_proCat_t.sum()
+# get total population
+total_population = df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"] + df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"]
+# =============================================================================
+distance_matrices = extract_distance_matrices(stream_distance_matrice, distance_matrices_of_interest)
+metric_matrices_ = extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest)
+metric_matrices = { k:v*10 for k, v in metric_matrices_.items()} # scale (speckle issue)
+logs  = computeTrips(
+    df_distributionMatrix,
+    df_total_trips_allNodes,
+    df_distMatrix_speckle,
+    df_alphas,
+    df_attraction_proNode_sum_total,
+    df_distances_aligned,
+    TARGET_TRIP_RATE,
+    SCALING_FACTOR,
+    total_population,
+    df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"],
+    df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"],
+    distance_matrices,
+    metric_matrices,
+    redistributeTrips,
+    DISTANCE_BRACKETS,
+    ALPHA_LOW, ALPHA_MED, ALPHA_HIGH, ALPHA, ALPHA_UNIFORM, F_VALUES_MANUAL,
+    CLIENT,
+    STREAM_ID,
+    TARGET_BRANCH_TM,
+    sourceCommits
+    )
+print(logs)

main.py ADDED Viewed

	@@ -0,0 +1,385 @@

+import os
+import json
+import pandas as pd
+import copy
+from functools import wraps
+from specklepy.api.client import SpeckleClient
+from tripGenerationFunc import *
+import speckle_utils
+import data_utils
+import gradio as gr
+import requests
+from huggingface_hub import webhook_endpoint, WebhookPayload
+from fastapi import Request
+import datetime
+current_directory = os.path.dirname(os.path.abspath(__file__))
+# Path to the config.json file
+config_file_path = os.path.join(current_directory, "config.json")
+# Check if the config.json file exists
+if os.path.exists(config_file_path):
+    # Load the JSON data from config.json
+    with open(config_file_path, 'r') as f:
+        config = json.load(f)
+    # Convert to Python variables with the same names as the keys in the JSON
+    locals().update(config)
+    print("varaibles from json")
+    # Now you can access the variables directly
+    print(STREAM_ID)
+    print(BRANCH_NAME_LAND_USES)
+    print(TARGET_TRIP_RATE)
+    print(ALPHA_LOW)
+    print(F_VALUES_MANUAL)
+    print(distance_matrices_of_interest)
+    print(redistributeTrips)
+    print(DISTANCE_BRACKETS)
+    print(XLS_FILE_PATH)
+    print("==================")
+else:
+    print("Error: config.json file not found in the current directory.")
+# checks payload of webhook and runs the main code if webhook was triggered by specified stream + one of the branches
+listendStreams = [STREAM_ID]
+listendBranchNames = [BRANCH_NAME_LAND_USES,BRANCH_NAME_DISTANCE_MATRIX,BRANCH_NAME_METRIC_DIST_MATRIX]
+@webhook_endpoint
+async def update_streams(request: Request):
+    # Initialize flag
+    should_continue = False
+    # Read the request body as JSON
+    payload = await request.json()
+    # Check if the payload structure matches the expected format
+    if "event" in payload and "data" in payload["event"]:
+        event_data = payload["event"]["data"]
+        # Check if the event type is "commit_create"
+        if "type" in event_data and event_data["type"] == "commit_create":
+            # Check if the stream name matches the specified list
+            if "stream" in event_data and event_data["stream"] in listendStreams:
+                # Check if the branch name matches the specified list
+                if "commit" in event_data and "branchName" in event_data["commit"]:
+                    if event_data["commit"]["branchName"] in listendBranchNames:
+                        should_continue = True
+                else:
+                    print("Branch name not found in payload.")
+            else:
+                print("Stream name not found or not in the specified list.")
+        else:
+            print("Event type is not 'commit_create'.")
+    else:
+        print("Payload structure does not match the expected format.")
+    # If the flag is True, continue running the main part of the code
+    if should_continue:
+        # Your main code logic goes here
+        runAll()
+    else:
+        print("Flag is False. Skipping further execution.")
+    return "Webhook processing complete."
+def runAll():
+     # get config file:# Parse JSON
+    speckle_token = os.environ.get("SPECKLE_TOKEN")
+    xls_file_path = os.path.join(current_directory, XLS_FILE_PATH)
+    print("full path", xls_file_path)
+    # fetch speckle data
+    CLIENT = SpeckleClient(host="https://speckle.xyz/")
+    CLIENT.authenticate_with_token(token="52566d1047b881764e16ad238356abeb2fc35d8b42")
+    # get land use stream
+    stream_land_use = speckle_utils.getSpeckleStream(STREAM_ID,
+                                            BRANCH_NAME_LAND_USES,
+                                            CLIENT,
+                                            commit_id = "")
+    # navigate to list with speckle objects of interest
+    stream_data = stream_land_use["@Data"]["@{0}"]
+    # transform stream_data to dataframe (create a backup copy of this dataframe)
+    df_speckle_lu = speckle_utils.get_dataframe(stream_data, return_original_df=False)
+    df_main = df_speckle_lu.copy()
+    # set index column
+    df_main =  df_main.set_index("ids", drop=False)
+    # get distance matrix stream
+    stream_distance_matrice = speckle_utils.getSpeckleStream(STREAM_ID,
+                                            BRANCH_NAME_DISTANCE_MATRIX,
+                                            CLIENT,
+                                            commit_id = "")
+    # navigate to list with speckle objects of interest
+    distance_matrices = {}
+    for distM in stream_distance_matrice["@Data"]['@{0}']:
+        for kk in distM.__dict__.keys():
+            try:
+                if kk.split("+")[1].startswith("distance_matrix"):
+                    distance_matrix_dict = json.loads(distM[kk])
+                    origin_ids = distance_matrix_dict["origin_uuid"]
+                    destination_ids = distance_matrix_dict["destination_uuid"]
+                    distance_matrix =  distance_matrix_dict["matrix"]
+                    # Convert the distance matrix to a DataFrame
+                    df_distances = pd.DataFrame(distance_matrix, index=origin_ids, columns=destination_ids)
+                    # i want to add the index & colum names to dist_m_csv
+                    #distance_matrices[kk]  = dist_m_csv[kk]
+                    distance_matrices[kk] = df_distances
+            except:
+                pass
+    # get metric matrix stream
+    stream_metric_matrice = speckle_utils.getSpeckleStream(STREAM_ID,
+                                            BRANCH_NAME_METRIC_DIST_MATRIX,
+                                            CLIENT,
+                                            commit_id = "")
+    # navigate to list with speckle objects of interest
+    metric_matrices = {}
+    for distM in stream_metric_matrice["@Data"]['@{0}']:
+        print(distM.__dict__.keys())
+        for kk in distM.__dict__.keys():
+            try:
+                if kk.split("+")[1].startswith("metric_matrix"):
+                    metric_matrix_dict = json.loads(distM[kk])
+                    origin_ids = metric_matrix_dict["origin_uuid"]
+                    destination_ids = metric_matrix_dict["destination_uuid"]
+                    metric_matrix =  metric_matrix_dict["matrix"]
+                    # Convert the distance matrix to a DataFrame
+                    df_metric_dist = pd.DataFrame(metric_matrix, index=origin_ids, columns=destination_ids)
+                    metric_matrices[kk] = df_metric_dist*10 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+                    print("metric_matrix_dict", metric_matrix_dict.keys())
+            except:
+                pass
+    metric_matrices =  extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest)
+    sourceCommits = {
+    "landuseCommitID": stream_land_use.id,
+        "distanceMatrixCommitID": stream_distance_matrice.id,
+        "metricMatrixCommitID": stream_metric_matrice.id
+    }
+    # READ XLS FILE ======================================
+    # Read Excel file into Pandas DataFrame
+    #Production
+    # Load Excel file separately
+    #xls_file_path = os.path.join(current_directory, XLS_FILE_PATH)
+    if os.path.exists(xls_file_path):
+        # Production
+        df_production = pd.read_excel(xls_file_path, sheet_name='Production')
+        df_production_transposed = df_production.T
+        df_production = preprocess_dataFrame(df_production, headerRow_idx=2, numRowsStart_idx=3)
+        df_production_transposed = preprocess_dataFrame(df_production_transposed, headerRow_idx=0, numRowsStart_idx=4,
+                                                    numColsStart_idx=4, rowNames_idx=2)
+        # Attraction
+        df_attraction = pd.read_excel(xls_file_path, sheet_name='Attraction')
+        df_attraction = preprocess_dataFrame(df_attraction, headerRow_idx=0, numRowsStart_idx=2)
+        # Distribution_Matrix
+        df_distributionMatrix = pd.read_excel(xls_file_path, sheet_name='Distribution_Matrix')
+        df_distributionMatrix = preprocess_dataFrame(df_distributionMatrix, headerRow_idx=0, numRowsStart_idx=2,
+                                                    numRowsEnd_idx=None, numColsStart_idx=2, numColsEnd_idx=None,
+                                                    rowNames_idx=0)
+        # Alphas
+        df_alphas = pd.read_excel(xls_file_path, sheet_name='Alphas')
+        df_alphas.columns = df_alphas.iloc[1]
+        df_alphas = df_alphas.iloc[0, 2:]
+        # Land use
+        df_lu = pd.read_excel(xls_file_path, sheet_name='Example_Land_Use')
+        df_lu = preprocess_dataFrame(df_lu, headerRow_idx=0, numRowsStart_idx=1)
+        df_lu["nameCombined"] = df_lu.iloc[:, 1].astype(str) + "+" + df_lu.iloc[:, 0].astype(str)
+        # Distance Matrix
+        df_distMatrix = pd.read_excel(xls_file_path, sheet_name='Example_Distance_Matrix')
+        df_distMatrix = preprocess_dataFrame(df_distMatrix, headerRow_idx=0, numRowsStart_idx=1, numRowsEnd_idx=None,
+                                            numColsStart_idx=1, numColsEnd_idx=None, rowNames_idx=0)
+    else:
+        print("Error: Excel file specified in config.json not found.")
+    # Land use strucutre =======
+    # THIS IS THE DISTANCE MATRIX THATS USED DOWN THE ROAD
+    df_distances_aligned, df_lu_stream_aligned = align_dataframes(distance_matrices[distanceMatrixName], df_main, 'ids')
+    #Create a df with lanuses
+    lu_cols = [col for col in df_lu_stream_aligned.columns if col.startswith("lu+")]
+    df_lu_stream = df_lu_stream_aligned[lu_cols]
+    # Remove "lu+" from the beginning of column names
+    df_lu_stream.columns = df_lu_stream.columns.str.lstrip('lu+')
+    df_lu_stream = df_lu_stream.T
+    df_lu_stream_t = df_lu_stream.T
+    df_lu_stream_with_nameLu_column = df_lu_stream.reset_index(drop=False).rename(columns={'index': 'nameLu'})
+    #---
+    df_lu_names_xlsx = pd.concat([df_lu.iloc[:, 0:2], df_lu.iloc[:, -1]], axis=1)
+    df_lu_names_xlsx.index = df_lu_names_xlsx.iloc[:, 1]
+    column_names = ['nameTripType', 'nameLu', 'nameCombined']
+    df_lu_names_xlsx.columns = column_names
+    print(f"df_lu_names_xlsx shape: {df_lu_names_xlsx.shape}")
+    df_lu_names_xlsx.head()
+    #--
+    # Merge DataFrames using an outer join
+    merged_df = pd.merge(df_lu_stream_with_nameLu_column, df_lu_names_xlsx, on='nameLu', how='outer')
+    # Get the unique names and their counts from df_lu_names_xlsx
+    name_counts = df_lu_names_xlsx['nameLu'].value_counts()
+    #print(name_counts)
+    # Identify names in df_lu_stream_with_nameLu_column that are not in df_lu_names_xlsx
+    missing_names = df_lu_stream_with_nameLu_column.loc[~df_lu_stream_with_nameLu_column['nameLu'].isin(df_lu_names_xlsx['nameLu'])]
+    # Append missing rows to df_lu_stream_with_nameLu_column
+    df_lu_stream_duplicated = pd.concat([merged_df, missing_names], ignore_index=True)
+    #--
+    # Find names in df_lu_names_xlsx that are not in df_lu_stream_with_nameLu_column
+    missing_names = df_lu_names_xlsx.loc[~df_lu_names_xlsx['nameLu'].isin(df_lu_stream_with_nameLu_column['nameLu'])]
+    #--
+    # print existing names (?)
+    df_lu_names_sorted = df_lu_names_xlsx.sort_values(by='nameLu')
+    df_lu_stream_duplicated_sorted = df_lu_stream_duplicated.sort_values(by='nameLu')
+    #--
+    # Merge DataFrames to get the order of names
+    merged_order = pd.merge(df_lu_names_xlsx[['nameCombined']], df_lu_stream_duplicated[['nameCombined']], on='nameCombined', how='inner')
+    # Sort df_lu_stream_duplicated based on the order of names in df_lu_names_xlsx
+    df_lu_stream_sorted = df_lu_stream_duplicated.sort_values(by='nameCombined', key=lambda x: pd.Categorical(x, categories=merged_order['nameCombined'], ordered=True))
+    # Reorganize columns
+    column_order = ['nameTripType', 'nameCombined'] + [col for col in df_lu_stream_sorted.columns if col not in ['nameTripType', 'nameCombined']]
+    # Create a new DataFrame with the desired column order
+    df_lu_stream_reordered = df_lu_stream_sorted[column_order]
+    df_lu_stream_reordered_t = df_lu_stream_reordered.T
+    #--
+    df_lu_stream_with_index = df_lu_stream_reordered_t.reset_index(drop=False).rename(columns={'index': 'ids'})
+    df_lu_stream_with_index.index = df_lu_stream_reordered_t.index
+    df_lu_num_t_index = df_lu_stream_with_index.iloc[3:]
+    df_distances_aligned_index = df_distances_aligned.reset_index(drop=False).rename(columns={'index': 'ids'})
+    df_distances_aligned_index.index = df_distances_aligned.index
+    df_lu_namesCombined = df_lu_stream_with_index.loc["nameCombined"].iloc[1:]
+    # Sort df_lu_stream_with_index based on the 'ids' column in df_distances_aligned_index
+    df_lu_stream_sorted = df_lu_stream_with_index.sort_values(by=['ids'], key=lambda x: pd.Categorical(x, categories=df_distances_aligned_index['ids'], ordered=True))
+    df_lu_num = df_lu_stream_sorted.T.iloc[1:, :-3]
+    df_lu_num.index = df_lu_namesCombined
+    df_distMatrix_speckle = df_distances_aligned
+    df_attraction_num = df_attraction.reset_index().iloc[:-1, 6:]
+    # =============================================================================
+    # TRIP GENERATION
+    # ATTRACTION & PRODUCTION ======================================================
+    """
+    INPUTS
+    df_attraction_num
+    df_lu_num
+    df_production
+    df_lu
+    df_production_transposed
+    """
+    df_attraction_proNode_sum_total = attraction_proNode_full_iter(df_attraction_num, df_lu_num, True)
+    #Get the sqmProPerson
+    df_sqmProPerson = df_production.iloc[0, 4:].reset_index()[3]
+    #Get the trip rate
+    df_tripRate = copy.deepcopy(df_production) # create a copy ensures df_tripRate doenst point to df_production
+    df_tripRate.index = df_tripRate.iloc[:, 0] #Set the row names
+    df_tripRate = df_tripRate.iloc[1:, 2]
+    #Numerical df from production ==============================================
+    df_production_num = df_production.iloc[1:, 4:]
+    df_production_transposed1 = df_production_num.T
+    df_total_trips_allNodes = production_proNode_total(df_lu,
+                                                    df_sqmProPerson,
+                                                    df_tripRate,
+                                                    df_production_num,
+                                                    df_production_transposed,
+                                                    df_lu_num, printSteps=False)
+    # Convert data types to float
+    df_total_trips_allNodes = df_total_trips_allNodes.astype(float)
+    df_tripRate = df_tripRate.astype(float)
+    df_total_trips_allNodes_sumPerson = df_total_trips_allNodes.div(df_tripRate, axis=0).sum()
+    df_total_trips_allNodes_sumPerson_proCat = df_total_trips_allNodes.div(df_tripRate, axis=0)
+    df_total_trips_allNodes_sumPerson_proCat_t = df_total_trips_allNodes_sumPerson_proCat.T
+    df_total_trips_allNodes_sumPerson_proCat_t_sum = df_total_trips_allNodes_sumPerson_proCat_t.sum()
+    # get total population
+    total_population = df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"] + df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"]
+    # =============================================================================
+    distance_matrices = extract_distance_matrices(stream_distance_matrice, distance_matrices_of_interest)
+    metric_matrices_ = extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest)
+    metric_matrices = { k:v*10 for k, v in metric_matrices_.items()} # scale (speckle issue)
+    logs  = computeTrips(
+        df_distributionMatrix,
+        df_total_trips_allNodes,
+        df_distMatrix_speckle,
+        df_alphas,
+        df_attraction_proNode_sum_total,
+        df_distances_aligned,
+        TARGET_TRIP_RATE,
+        SCALING_FACTOR,
+        total_population,
+        df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"],
+        df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"],
+        distance_matrices,
+        metric_matrices,
+        redistributeTrips,
+        DISTANCE_BRACKETS,
+        ALPHA_LOW, ALPHA_MED, ALPHA_HIGH, ALPHA, ALPHA_UNIFORM, F_VALUES_MANUAL,
+        CLIENT,
+        STREAM_ID,
+        TARGET_BRANCH_TM,
+        sourceCommits
+        )
+    print(logs)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+pandas==1.3.3
+numpy==1.21.2
+numba==0.54.1
+gradio
+specklepy
+requests

tripGenerationFunc.py ADDED Viewed

	@@ -0,0 +1,910 @@

+import numpy as np
+import pandas as pd
+from numba import jit
+import math
+import json
+import os
+import sys
+from specklepy.api.client import SpeckleClient
+from specklepy.api.credentials import get_default_account, get_local_accounts
+from specklepy.transports.server import ServerTransport
+from specklepy.api import operations
+from specklepy.objects.geometry import Polyline, Point
+from specklepy.objects import Base
+from specklepy.api import operations, models
+from specklepy.transports.server import ServerTransport
+import time
+from functools import wraps
+import speckle_utils
+import data_utils
+# !!! lots of hard coded values in computeTrips !!!
+# UTILS
+def reconstruct_dataframe(alpha_low, alpha_med, alpha_high, original_df):
+    # Define the mapping from original values to new alpha parameters
+    value_to_alpha = {
+        0.00191: alpha_low,
+        0.00767: alpha_high,
+        0.0038: alpha_med
+    }
+    # Check if each value is present at least once in the DataFrame
+    for original_value in value_to_alpha.keys():
+        if not (original_df == original_value).any().any():
+            raise ValueError(f"Value {original_value} not found in the input DataFrame.")
+    # Create a new DataFrame based on the original one
+    new_df = original_df.copy()
+    # Apply the mapping to each element in the DataFrame
+    for original_value, new_value in value_to_alpha.items():
+        new_df = new_df.replace(original_value, new_value)
+    return new_df
+def preprocess_dataFrame(df, headerRow_idx=0, numRowsStart_idx = None, numRowsEnd_idx=None, numColsStart_idx=None, numColsEnd_idx=None, rowNames_idx=None):
+  df.columns = df.iloc[headerRow_idx] #Set the header
+  if rowNames_idx is not None:
+    df.index = df.iloc[:, rowNames_idx] #Set the row names
+  df = df.iloc[numRowsStart_idx : numRowsEnd_idx, numColsStart_idx:numColsEnd_idx] #Slice the dataset to numerical data
+  return df
+def timeit(f):
+    def timed(*args, **kw):
+        ts = time.time()
+        result = f(*args, **kw)
+        te = time.time()
+        print ('func:%r args:[%r, %r] took: %2.4f sec' % \
+          (f.__name__, te-ts))
+          #(f.__name__, args, kw, te-ts))
+        return result
+    return timed
+def timing_decorator(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        duration = end_time - start_time
+        timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+        print(f"{func.__name__} took {duration:.4f} seconds. Finished at {timestamp}")
+        return result
+    return wrapper
+# Function to compare two dataframes after converting and rounding
+def compare_dataframes(df1, df2, decimals=8):
+    # Function to convert DataFrame columns to float and then round
+    def convert_and_round_dataframe(df, decimals):
+        # Convert all columns to float
+        df_float = df.astype(float)
+        # Round to the specified number of decimals
+        return df_float.round(decimals)
+    rounded_df1 = convert_and_round_dataframe(df1, decimals)
+    rounded_df2 = convert_and_round_dataframe(df2, decimals)
+    are_equal = rounded_df1.equals(rounded_df2)
+    print("Both methods are equal:", are_equal)
+    print("Numba shape:", df2.shape)
+    print("Original shape:", df1.shape)
+    print("======== ORIGINAL OUTPUT (first item in output list, head() for the first 5 columns)")
+    print(df1.iloc[0:5].head(2))
+    print("======== New method OUTPUT (first item in output list, head() for the first 5 columns)")
+    print(df2.iloc[0:5].head(2))
+def align_dataframes(df1, df2, key):
+    """
+    Align two dataframes based on a common key, ensuring that both dataframes
+    have only the rows with matching keys.
+    Parameters:
+    - df1: First dataframe.
+    - df2: Second dataframe.
+    - key: Column name to align dataframes on.
+    Returns:
+    - df1_aligned, df2_aligned: Tuple of aligned dataframes.
+    """
+    common_ids = df1.index.intersection(df2[key])
+    df1_aligned = df1.loc[common_ids]
+    df2_aligned = df2[df2[key].isin(common_ids)].set_index(key, drop=False)
+    return df1_aligned, df2_aligned
+#==================================================================================================
+def attraction_proNode(df_attraction_num, df_lu, df_lu_anName=None, printSteps=False):
+  #lu_proNode
+  df_lu_proNode = df_lu.reset_index()[df_lu_anName]
+  if printSteps:
+    print(df_lu_proNode.shape)
+    df_lu_proNode.head(50)
+  #attraction_proNode
+  if printSteps:
+    print("df_attraction_num:", df_attraction_num.shape)
+    print("df_lu_proNode:", df_lu_proNode.shape)
+  df_attraction_proNode = df_attraction_num.mul(df_lu_proNode, axis=0)
+  if printSteps:
+    print("df_attraction_proNode:", df_attraction_proNode.shape)
+    df_attraction_proNode.head(100)
+  # Sum the values of each column
+  df_attraction_proNode_sum = pd.DataFrame(df_attraction_proNode.sum(), columns=['Sum'])
+  if printSteps:
+    print("df_attraction_proNode_sum:", df_attraction_proNode_sum.shape)
+    df_attraction_proNode_sum.head(100)
+  return df_attraction_proNode_sum
+#Non vectorized iterative function
+def attraction_proNode_full_iter(df_attraction_num, df_lu_num, printSteps=False):
+  # Initialize an empty DataFrame
+  df_attraction_proNode_sum_total = pd.DataFrame()
+  for column_name, column_data in df_lu_num.items():
+    df_attraction_proNode_sum = attraction_proNode(df_attraction_num, df_lu_num, df_lu_anName=column_name)
+    # Concatenate DataFrames along columns
+    df_attraction_proNode_sum_total = pd.concat([df_attraction_proNode_sum_total, df_attraction_proNode_sum], axis=1)
+  # Rename columns in df_distBasedAttr_step2 with the same column names as in df_distributionMatrix_step1
+  df_attraction_proNode_sum_total.columns = df_lu_num.columns
+  return df_attraction_proNode_sum_total
+# PRODUCTION ================================================
+def production_proNode(df_lu, df_sqmProPerson, df_tripRate, df_production_num, df_production_transposed, printSteps=False, df_lu_anName=None):
+  #lu_proNode - reset index
+  df_lu_proNode = df_lu.reset_index()[df_lu_anName]
+  if printSteps:
+    print(df_lu_proNode.shape)
+    df_lu_proNode.head(50)
+  #Get the person count - Divide corresponding values of one DataFrame by another
+  df_personCount = df_lu_proNode.div(df_sqmProPerson)
+  if printSteps:
+    print(df_personCount.shape)
+    print(df_personCount)
+  # Ensure the index is unique in df_personCount
+  df_personCount = df_personCount.reset_index(drop=True)
+  df_production_transposed = df_production_transposed.reset_index(drop=True)
+  if printSteps:
+    df_production_transposed.head()
+  if printSteps:
+    df_personCount.head()
+    df_tripRate.head()
+  #Calculate trip production pro node
+  df_production_proNode = df_production_transposed
+  df_production_proNode = df_production_proNode.mul(df_personCount, axis=0)
+  df_production_proNode = df_production_proNode.T
+  df_production_proNode = df_production_proNode.mul(df_tripRate, axis=0)
+  #Total trips
+  df_production_proNode_rowSum = df_production_proNode.sum(axis=1)
+  df_total_trips = df_production_proNode_rowSum
+  #if printSteps:
+    #df_total_trips.head(50)
+  return df_total_trips
+#Non vectorized iterative function
+def production_proNode_total(df_lu, df_sqmProPerson, df_tripRate, df_production_num, df_production_transposed, df_lu_num, printSteps=False):
+  # Initialize an empty DataFrame
+  df_total_trips_allNodes = pd.DataFrame()
+  for column_name, column_data in df_lu_num.items():
+    df_total_trips_proNode = production_proNode(df_lu_num, df_sqmProPerson, df_tripRate, df_production_num, df_production_transposed, printSteps=False, df_lu_anName=column_name)
+    # Concatenate DataFrames along columns
+    df_total_trips_allNodes = pd.concat([df_total_trips_allNodes, df_total_trips_proNode], axis=1)
+  # Rename columns in df_distBasedAttr_step2 with the same column names as in df_distributionMatrix_step1
+  df_total_trips_allNodes.columns = df_lu_num.columns
+  return df_total_trips_allNodes
+#df_total_trips_allNodes = production_proNode_total(df_lu, df_sqmProPerson, df_tripRate, df_production_num, df_production_transposed, df_lu_num, printSteps=False)
+#df_total_trips_allNodes.head(50)
+#==================================================================================================
+#STEP 1
+def step_1(df_distributionMatrix, df_total_trips_allNodes):
+  l = []
+  #counter=0
+  for column_name, column_data in df_total_trips_allNodes.items():
+    df_distributionMatrix_step1_proNode = df_distributionMatrix.mul(column_data, axis = 0)
+    l.append(df_distributionMatrix_step1_proNode)
+  return l
+#STEP 2
+def step_2_vectorized(df_distMatrix, df_alphas):
+    # Convert df_distMatrix to a 2D array: Shape (1464, 1464)
+    distMatrix_array = df_distMatrix.values
+    # Convert df_alphas to a 1D array: Shape (26,)
+    alphas_array = df_alphas.values
+    # Initialize an empty array to store results: Shape (1464, 1464, 26)
+    result_3d = np.zeros((distMatrix_array.shape[0], distMatrix_array.shape[1], len(alphas_array)))
+    # Loop over alphas and perform element-wise multiplication followed by exponential function
+    for i in range(len(alphas_array)):
+        result_3d[:, :, i] = np.exp(-distMatrix_array * alphas_array[i])
+    # Construct the final list of DataFrames
+    final_list = [pd.DataFrame(result_3d[i, :, :], columns=df_alphas.index, index=df_distMatrix.index) for i in range(result_3d.shape[0])]
+    return final_list
+# Step 3
+@jit(nopython=True)
+def multiply_and_sum(arr, attraction_arr):
+    # Element-wise multiplication
+    multiplied_arr = arr * attraction_arr
+    # Sum the values of each column
+    summed_arr = multiplied_arr.sum(axis=0)
+    return multiplied_arr, summed_arr
+def step_3_numba(df_attraction_proNode_sum_total, df_step_2):
+    # Convert df_attraction_proNode_sum_total to a NumPy array and transpose it
+    attraction_array = df_attraction_proNode_sum_total.values.T.astype(np.float64)  # Ensure float64 dtype
+    multiplied_results = []
+    summed_results = []
+    for df in df_step_2:
+        # Convert DataFrame to NumPy array with float64 dtype
+        df_array = df.values.astype(np.float64)
+        # Perform element-wise multiplication and summing
+        multiplied_arr, summed_arr = multiply_and_sum(df_array, attraction_array)
+        # Convert results back to DataFrames
+        df_multiplied = pd.DataFrame(multiplied_arr, columns=df.columns, index=df.index)
+        # Reshape summed_arr to have shape (26,1) and then convert to DataFrame
+        df_summed = pd.DataFrame(summed_arr.reshape(-1, 1), index=df.columns, columns=['Sum'])
+        multiplied_results.append(df_multiplied)
+        summed_results.append(df_summed)
+    return multiplied_results, summed_results
+# step 4
+@jit(nopython=True)
+def divide_and_sum(arr, divisor_arr):
+    # Ensure divisor_arr is broadcastable to arr's shape
+    divisor_arr_expanded = divisor_arr.reshape((divisor_arr.shape[0], 1, divisor_arr.shape[1]))
+    # Initialize arrays to store results
+    divided_result = np.zeros_like(arr)
+    summed_result = np.zeros((arr.shape[0], arr.shape[2]))
+    for i in range(arr.shape[0]):
+        for j in range(arr.shape[1]):
+            for k in range(arr.shape[2]):
+                if divisor_arr_expanded[i, 0, k] != 0:
+                    divided_result[i, j, k] = arr[i, j, k] / divisor_arr_expanded[i, 0, k]
+                summed_result[i, k] += divided_result[i, j, k]
+    return divided_result, summed_result
+def step_4_numba(distAndAreaBasedAttr_step3, distAndAreaBasedAttr_step3_sum):
+    # Convert lists of DataFrames to 3D arrays with dtype float64
+    array_step3 = np.array([df.values for df in distAndAreaBasedAttr_step3]).astype(np.float64)
+    array_step3_sum = np.array([df.values for df in distAndAreaBasedAttr_step3_sum]).astype(np.float64)
+    # Perform division and summation using Numba
+    divided_result, summed_result = divide_and_sum(array_step3, array_step3_sum)
+    # Convert results back to lists of DataFrames
+    df_distAndAreaBasedAttr_step4 = [pd.DataFrame(divided_result[i], columns=distAndAreaBasedAttr_step3[0].columns, index=distAndAreaBasedAttr_step3[0].index) for i in range(divided_result.shape[0])]
+    # Correct the creation of the summed DataFrame to avoid setting the 'Sum' index
+    df_distAndAreaBasedAttr_step4_sum = [pd.DataFrame(summed_result[i]).T.set_axis(['Sum'], axis='index').set_axis(distAndAreaBasedAttr_step3[0].columns, axis='columns') for i in range(summed_result.shape[0])]
+    return df_distAndAreaBasedAttr_step4, df_distAndAreaBasedAttr_step4_sum
+# step 5
+@jit(nopython=True)
+def tripsPerArctivity_numba(matrix, attrs):
+    rows, cols = attrs.shape[0], matrix.shape[0]  # 1464, 26
+    result = np.zeros((cols, rows), dtype=np.float64)  # Prepare result matrix (26, 1464)
+    for i in range(rows):  # Iterate over each area
+        for j in range(cols):  # Iterate over each land use category
+            sum_val = 0.0
+            for k in range(cols):  # Iterate over each element in the distribution matrix row
+                sum_val += matrix[j, k] * attrs[i, k]
+            result[j, i] = sum_val
+    return result
+def step_5_numba(distributionMatrix_step1, distAndAreaBasedAttr_step4):
+    sums = []
+    count = 0
+    total_count = len(distributionMatrix_step1)
+    for df_distributionMatrix_step1, df_distAndAreaBasedAttr_step4 in zip(distributionMatrix_step1, distAndAreaBasedAttr_step4):
+        # Convert DataFrames to NumPy arrays with dtype float64
+        matrix = df_distributionMatrix_step1.values.astype(np.float64)
+        attrs = df_distAndAreaBasedAttr_step4.values.astype(np.float64)
+        result = tripsPerArctivity_numba(matrix, attrs)
+        df_result = pd.DataFrame(result, index=df_distributionMatrix_step1.columns, columns=df_distAndAreaBasedAttr_step4.index)
+        sums.append(df_result)
+        count += 1
+        #print(f"Iteration {count} out of {total_count} is finished.")
+        #print("---------")
+    return sums
+# step 6&7
+def step_6_7_vectorized(df_trips_proNode_proActivity_total):
+    # Convert each DataFrame to a NumPy array and stack them to form a 3D array
+    array_3d = np.array([df.values for df in df_trips_proNode_proActivity_total])
+    # Sum across the middle axis (columns of each DataFrame)
+    summed_array = array_3d.sum(axis=1)
+    # Convert the summed array back to a DataFrame
+    final_matrix = pd.DataFrame(summed_array, index=df_trips_proNode_proActivity_total[0].columns, columns=df_trips_proNode_proActivity_total[0].columns)
+    return final_matrix
+# step 8
+def adjTripRate_adjFactor(tripMatrix,df_total_trips_allNodes_sumPerson, targetRate=1, factor=1 ):
+  df_tripMatrix_total_sum = tripMatrix.sum().sum()
+  df_total_trips_allNodes_sumPerson_total = df_total_trips_allNodes_sumPerson.sum()
+  # scale to target trip rate
+  tripRateBeforeAdjustment = df_tripMatrix_total_sum/df_total_trips_allNodes_sumPerson_total
+  print("tripRateBeforeAdjustment",tripRateBeforeAdjustment)
+  adjustmentRate = targetRate/tripRateBeforeAdjustment
+  print("adjustmentRate",adjustmentRate)
+  # scale by ... scale factor (outdated, was hardcoded )
+  df_tripMatrix_adjusted = tripMatrix * adjustmentRate
+  #df_tripMatrix_adjusted_scaled = df_tripMatrix_adjusted.div(factor)
+  return df_tripMatrix_adjusted, df_tripMatrix_adjusted # df_tripMatrix_adjusted_scaled
+# Uniform Matrix
+def decay(d, alpha):
+    return math.exp(d * alpha * -1)
+def distanceDecay(df, alpha):
+    return df.applymap(lambda x: decay(x, alpha))
+def matrix_reduce_add(df):
+    return df[df != sys.float_info.max].sum().sum()
+def replace_maxValue(df):
+    return df.replace(sys.float_info.max, 0)
+#Trip gen matrix is used to scale the distance matrix
+def getUniformMatrix(distanceMatrix, tripGenMatrix, alpha):
+    distanceMatrix_withDecay = distanceDecay(distanceMatrix, alpha)
+    distanceMatrix_sum = matrix_reduce_add(distanceMatrix_withDecay)
+    tripGenMatrix_sum = matrix_reduce_add(tripGenMatrix)
+    ratio = distanceMatrix_sum / tripGenMatrix_sum
+    uniformMatrix = distanceMatrix_withDecay.div(ratio)
+    return replace_maxValue(uniformMatrix)
+#==================================================================================================
+#Modal Split functions
+def computeModalShare(trip_matrix, dist_matrices, alpha, f_values=None):
+    """
+    Process matrices or DataFrames with exponentiation and normalization.
+    Args:
+        trip_matrix (np.ndarray or pd.DataFrame): The trip matrix.
+        dist_matrices (dict of np.ndarray or pd.DataFrame): Dictionary of distance matrices.
+        alpha (float): The alpha coefficient.
+        f_values (dict of float, optional): Dictionary of f coefficients for each matrix. If None, defaults to 0 for each matrix.
+    Returns:
+        dict: Normalized matrices.
+    """
+    # Default f_values to 0 for each key in dist_matrices if not provided
+    if not f_values:
+        f_values = {key: 0 for key in dist_matrices.keys()}
+    exp_matrices = {}
+    for key, matrix in dist_matrices.items():
+        f = f_values.get(key, 0)
+        # Convert DataFrame to numpy array if needed
+        if isinstance(matrix, pd.DataFrame):
+            matrix = matrix.values
+        exp_matrix = np.exp(-1 * (matrix * alpha + f))
+        exp_matrices[key] = exp_matrix
+    # Calculate the sum of all exponentials
+    sum_exp = sum(exp_matrices.values())
+    # Normalize each matrix & multiply by trip matrix and update the matrices
+    normalized_matrices = {key: (exp_matrix / sum_exp) * trip_matrix for key, exp_matrix in exp_matrices.items()}
+    return normalized_matrices
+def redistributeModalShares(dist_matrices, trip_matrices, redistribution_rules, threshold=0.5):
+    """
+    Redistribute trips among mobility networks based on given redistribution rules and when travel times are within a specified threshold.
+    Args:
+        dist_matrices (dict): Dictionary of distance matrices (travel times) for different mobility networks, keyed by identifier.
+        trip_matrices (dict): Dictionary of matrices representing the number of trips for each mobility network, keyed by identifier.
+        redistribution_rules (list): List of redistribution rules with "from" and "to" network identifiers.
+        threshold (float): The threshold for considering travel times as similar.
+    Returns:
+        dict: Updated dictionary of trip matrices with transferred trips.
+    """
+    # Verify that all specified matrices exist in the input dictionaries
+    for rule in redistribution_rules:
+        if rule["from"] not in dist_matrices or rule["from"] not in trip_matrices:
+            raise ValueError(f"Matrix ID {rule['from']} not found in the inputs.")
+        for to_id in rule["to"]:
+            if to_id not in dist_matrices or to_id not in trip_matrices:
+                raise ValueError(f"Matrix ID {to_id} not found in the inputs.")
+    # Copy the trip_matrices to avoid modifying the input directly
+    updated_trip_matrices = {k: v.copy() for k, v in trip_matrices.items()}
+    # Redistribute trips based on the rules and the threshold
+    for rule in redistribution_rules:
+        from_matrix_id = rule["from"]
+        from_matrix_trips = updated_trip_matrices[from_matrix_id]
+        from_matrix_dist = dist_matrices[from_matrix_id]
+        for to_matrix_id in rule["to"]:
+            to_matrix_dist = dist_matrices[to_matrix_id]
+            # Create a boolean array where the absolute difference in travel times is less than or equal to the threshold
+            similar_travel_time = np.abs(from_matrix_dist - to_matrix_dist) <= threshold
+            # Find the indices where there are trips to transfer under the new condition
+            indices_to_transfer = similar_travel_time & (from_matrix_trips > 0)
+            # Transfer trips where the condition is True
+            updated_trip_matrices[to_matrix_id][indices_to_transfer] += from_matrix_trips[indices_to_transfer]
+            # Zero out the transferred trips in the from_matrix
+            from_matrix_trips[indices_to_transfer] = 0
+    # Return the updated trip matrices dictionary
+    return updated_trip_matrices
+def computeDistanceBrackets(trip_matrices, metric_dist_matrices, dist_brackets=[800, 2400, 4800]):
+    # Transform the keys of metric_dist_matrices to match with trip_matrices
+    transformed_metric_keys = {key.replace("metric_matrix", "distance_matrix")+"_noEntr": matrix
+                               for key, matrix in metric_dist_matrices.items()}
+    # Initialize dictionary to store aggregated trips per distance bracket
+    bracket_totals = {bracket: 0 for bracket in dist_brackets}
+    # Iterate over each pair of trip matrix and distance matrix
+    for key, trip_matrix in trip_matrices.items():
+        # Find the corresponding distance matrix
+        dist_matrix = transformed_metric_keys.get(key)
+        if dist_matrix is None:
+            print("no matrxi found")
+            continue  # Skip if no corresponding distance matrix found
+        # Calculate trips for each distance bracket
+        for i, bracket in enumerate(dist_brackets):
+            if i == 0:
+                # For the first bracket, count trips with distance <= bracket
+                bracket_totals[bracket] += (trip_matrix[dist_matrix <= bracket]).sum().sum()
+            else:
+                # For subsequent brackets, count trips within the bracket range
+                prev_bracket = dist_brackets[i - 1]
+                bracket_totals[bracket] += (trip_matrix[(dist_matrix > prev_bracket) & (dist_matrix <= bracket)]).sum().sum()
+    brackets_sum = sum(bracket_totals.values())
+    brackets_rel = {str(bracket): round(total / brackets_sum, 3) for bracket, total in bracket_totals.items()}
+    return brackets_rel
+def computeTripStats(trip_matrices, distance_matrices, metric_dist_matrices, pop):
+    # Transform the keys of metric_dist_matrices to match with trip_matrices
+    transformed_metric_keys = {key.replace("metric_matrix", "distance_matrix")+"_noEntr": matrix
+                               for key, matrix in metric_dist_matrices.items()}
+    trips = 0
+    totalTravelDistance = 0
+    totalTravelTime = 0
+    # Iterate over each pair of trip matrix and distance matrix
+    for key, trip_matrix in trip_matrices.items():
+        # Find the corresponding distance matrix
+        metric_dist_matrix = transformed_metric_keys.get(key)
+        dist_matrix = distance_matrices.get(key)
+        if metric_dist_matrix is None:
+            print("no matrxi found")
+            continue  # Skip if no corresponding distance matrix found
+        # compute
+        totalTravelTime += (dist_matrix*trip_matrix).sum().sum()
+        trips +=  trip_matrix.sum().sum()
+        totalTravelDistance += (metric_dist_matrix*trip_matrix).sum().sum()
+    MeanTripDistance = totalTravelDistance/trips
+    MeanTravelDistancePerPerson = totalTravelDistance/pop
+    MeanTravelTime = totalTravelTime/trips
+    MeanTravelTimePerPerson = totalTravelTime/pop
+    return totalTravelDistance, totalTravelTime, MeanTripDistance, MeanTravelDistancePerPerson, MeanTravelTime, MeanTravelTimePerPerson
+def calculate_relative_mode_share(trip_matrices):
+    """
+    Calculate the relative mode share for a dictionary of trip matrices.
+    Args:
+        trip_matrices (dict of np.ndarray or pd.DataFrame): Dictionary of trip matrices.
+    Returns:
+        dict: Relative mode distribution for each key in trip_matrices.
+    """
+    # Compute the total trips for each mode
+    total_trips_per_mode = {key: matrix.sum().sum() for key, matrix in trip_matrices.items()}
+    # Compute the total trips across all modes
+    total_trips_all_modes = sum(total_trips_per_mode.values())
+    # Calculate the relative mode distribution
+    rel_mode_distribution = {key: trips_per_mode / total_trips_all_modes for key, trips_per_mode in total_trips_per_mode.items()}
+    return rel_mode_distribution
+def extract_distance_matrices(stream, distance_matrices_of_interest):
+    """
+    Extract distance matrices from the stream and convert them to pandas DataFrames.
+    Args:
+        stream (dict): Stream data containing distance matrices.
+        distance_matrices_of_interest (list of str): List of keys for the distance matrices of interest.
+    Returns:
+        dict: A dictionary of pandas DataFrames, where each key is a distance matrix kind.
+    """
+    distance_matrices = {}
+    for distMK in distance_matrices_of_interest:
+        for distM in stream["@Data"]['@{0}']:
+            #print( distM.__dict__.keys())
+            try:
+                distMdict = distM.__dict__[distMK]
+                distance_matrix_dict = json.loads(distMdict)
+                origin_ids = distance_matrix_dict["origin_uuid"]
+                destination_ids = distance_matrix_dict["destination_uuid"]
+                distance_matrix = distance_matrix_dict["matrix"]
+                # Convert the distance matrix to a DataFrame
+                df_distances = pd.DataFrame(distance_matrix, index=origin_ids, columns=destination_ids)
+                distance_matrices[distMK] = df_distances
+            except Exception as e:
+                pass
+    return distance_matrices
+#==================================================================================================
+def computeTrips(
+    df_distributionMatrix,
+    df_total_trips_allNodes,
+    df_distMatrix_speckle,
+    df_alphas,
+    df_attraction_proNode_sum_total,
+    df_distances_aligned,
+    TARGET_TRIP_RATE,
+    SCALING_FACTOR,
+    total_population,
+    tot_res,
+    tot_vis,
+    distance_matrices,
+    metric_matrices,
+    redistributeTrips,
+    DISTANCE_BRACKETS,
+    alpha_low, alpha_med, alpha_high,
+    alpha_mode,
+    alpha_uniform,
+    NEW_F_VALUES,
+    CLIENT,
+    TARGET_STREAM,
+    TARGET_BRANCH,
+    sourceInfo="",
+    ):
+    NEW_ALPHAS = reconstruct_dataframe(alpha_low, alpha_med, alpha_high, df_alphas)
+    NEW_MODE_ALPHA = alpha_mode
+    # ====
+    #step 1
+    distributionMatrix_step1M = step_1(df_distributionMatrix,
+                                    df_total_trips_allNodes)
+    #step 2
+    df_step_2M = step_2_vectorized(df_distMatrix_speckle,
+                                NEW_ALPHAS)
+    #step 3
+    distAndAreaBasedAttr_step3M, distAndAreaBasedAttr_step3_sumM  = step_3_numba(df_attraction_proNode_sum_total,
+                                                                            df_step_2M)
+    #step 4
+    distAndAreaBasedAttr_step4M, distAndAreaBasedAttr_step4_sumM = step_4_numba(distAndAreaBasedAttr_step3M,
+                                                                            distAndAreaBasedAttr_step3_sumM)
+    #step 5
+    df_trips_proNode_proActivity_totalM = step_5_numba(distributionMatrix_step1M,
+                                                    distAndAreaBasedAttr_step4M)
+    #step 6 & 7
+    df_tripMatrixM = step_6_7_vectorized(df_trips_proNode_proActivity_totalM)
+    #step 8
+    df_tripMatrix_adjustedM, df_tripMatrix_adjusted_scaledM = adjTripRate_adjFactor(df_tripMatrixM,
+                                                                                total_population,
+                                                                                TARGET_TRIP_RATE,
+                                                                                SCALING_FACTOR )
+    #------
+    #MAIN 1 compute trip matrice per mode
+    trip_matricesM = computeModalShare(df_tripMatrix_adjusted_scaledM,
+                                    distance_matrices,
+                                    NEW_MODE_ALPHA,
+                                    f_values=NEW_F_VALUES)
+    #MAIN 2 compute modal shares (redistribute trips in case of identical travel time)
+    trip_matrices_redisM = redistributeModalShares(distance_matrices,
+                                                trip_matricesM,
+                                                redistributeTrips)
+    #POST 1 compute mode shares
+    rel_mode_distributionM = calculate_relative_mode_share(trip_matrices_redisM)
+    #POST 2 distance brackets
+    dist_sharesM = computeDistanceBrackets(trip_matrices_redisM,
+                                        metric_matrices,
+                                        DISTANCE_BRACKETS)
+    #POST 3 compute more stats
+    (totalTravelDistance, totalTravelTime,
+    MeanTripDistance,MeanTravelDistancePerPerson,
+    MeanTripTime, MeanTravelTimePerPerson) = computeTripStats(trip_matrices_redisM,
+                                                            distance_matrices,
+                                                            metric_matrices,
+                                                            total_population)
+    uniform_tripmatrix = getUniformMatrix(df_distances_aligned, df_tripMatrix_adjustedM, alpha_uniform)
+    #add to dataframe
+    # Define your parameter and target values
+    newdata = {
+        # Model Parameter==
+        # Alpha - Routing
+        "alpha_low": alpha_low,
+        "alpha_med": alpha_med,
+        "alpha_high": alpha_high,
+        "alpha_uniform":alpha_uniform,
+        "fvalues":NEW_F_VALUES,
+        "alpha_mode":NEW_MODE_ALPHA,
+        # Model Indicators ==
+        # Modal Shares
+        "share_ped_mm_art": rel_mode_distributionM['activity_node+distance_matrix_ped_mm_art_noEntr'],
+        "share_ped_mm": rel_mode_distributionM['activity_node+distance_matrix_ped_mm_noEntr'],
+        "share_ped": rel_mode_distributionM['activity_node+distance_matrix_ped_noEntr'],
+        "share_ped_art": rel_mode_distributionM['activity_node+distance_matrix_ped_art_noEntr'],
+        # Tripshares by Distance Brackets
+        "800": dist_sharesM["800"],
+        "2400": dist_sharesM["2400"],
+        "4800": dist_sharesM["4800"],
+        # Travel Time & Distances
+        "totalTravelDistance":totalTravelDistance,
+        "totalTravelTime":totalTravelTime,
+        "MeanTravelTimePerPerson":MeanTravelTimePerPerson,
+        # Trip Distances
+        "MeanTripDistance":MeanTripDistance,
+        "MeanTripTime":MeanTripTime,
+        "MeanTravelDistancePerPerson":MeanTravelDistancePerPerson,
+    }
+    trip_matrice_adjName = {k.replace("distance", "trip"):v for k, v in trip_matricesM.items()}
+    trip_matrice_adjName["tripMatrix_landuse"] = df_tripMatrix_adjusted_scaledM
+    trip_matrice_adjName["tripMatrix_uniform"] = uniform_tripmatrix
+    extraData = {"population":total_population,
+        "residents":tot_res,
+        "visitors":tot_vis,
+        "parameter":newdata,
+        }
+    commitMsg = "automatic update"
+    try:
+        commitMsg += " using these commits: #+ "
+        for k,v in sourceInfo.items():
+            commitMsg += f" {k}: {v}"
+    except:
+        pass
+    print(commitMsg)
+    commit_id = send_matrices_and_create_commit(
+            trip_matrice_adjName,
+            CLIENT,
+            TARGET_STREAM,
+            TARGET_BRANCH,
+            commitMsg,
+            rows_per_chunk=300,
+            containerMetadata=extraData
+        )
+    print ("===============================")
+    return newdata
+#==================================================================================================
+# speckle send
+def send_row_bundle(rows, indices, transport):
+    bundle_object = Base()
+    bundle_object.rows = rows
+    bundle_object.indices = indices
+    bundle_id = operations.send(base=bundle_object, transports=[transport])
+    return bundle_id
+def send_matrix(matrix_df, transport, rows_per_chunk):
+    matrix_object = Base(metaData="Some metadata")
+    batch_index = 0  # Maintain a separate counter for batch indexing
+    # Bundle rows together
+    rows = []
+    indices = []
+    for index, row in matrix_df.iterrows():
+        rows.append([round(r,4) for r in row.tolist()])
+        indices.append(str(index))
+        if len(rows) == rows_per_chunk:
+            bundle_id = send_row_bundle(rows, indices, transport)
+            # Set the reference to the bundle in the matrix object using setattr
+            setattr(matrix_object, f"@batch_{batch_index}", {"referencedId": bundle_id})
+            rows, indices = [], []  # Reset for the next bundle
+            batch_index += 1  # Increment the batch index
+            print( str(rows_per_chunk) +" rows has been sent")
+    # Don't forget to send the last bundle if it's not empty
+    if rows:
+        bundle_id = send_row_bundle(rows, indices, transport)
+        setattr(matrix_object, f"@batch_{batch_index}", {"referencedId": bundle_id})
+    # Send the matrix object to Speckle
+    matrix_object_id = operations.send(base=matrix_object, transports=[transport])
+    return matrix_object_id
+# Main function to send all matrices and create a commit
+def send_matrices_and_create_commit(matrices, client, stream_id, branch_name, commit_message, rows_per_chunk, containerMetadata):
+    transport = ServerTransport(client=client, stream_id=stream_id)
+    matrix_ids = {}
+    # Send each matrix row by row and store its object ID
+    for k, df in matrices.items():
+        matrix_ids[k] = send_matrix(df, transport, rows_per_chunk)
+        print("object: " + k + " has been sent")
+    # Create a container object that will hold references to all the matrix objects
+    container_object = Base()
+    for k, v in containerMetadata.items():
+      container_object[k] = v
+    # Assuming you have a way to reference matrix objects by their IDs in Speckle
+    for k, obj_id in matrix_ids.items():
+      print("obj_id", obj_id)
+      container_object[k] = obj_id
+    # Dynamically add references to the container object
+    for matrix_name, matrix_id in matrix_ids.items():
+        # This assigns a reference to the matrix object by its ID
+        # You might need to adjust this based on how your Speckle server expects to receive references
+        setattr(container_object, matrix_name, {"referencedId": matrix_id})
+    # Send the container object
+    container_id = operations.send(base=container_object, transports=[transport])
+    # Now use the container_id when creating the commit
+    commit_id = client.commit.create(
+        stream_id=stream_id,
+        object_id=container_id,  # Use the container's ID here
+        branch_name=branch_name,
+        message=commit_message,
+      )

utils.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import time
+from functools import wraps
+def reconstruct_dataframe(alpha_low, alpha_med, alpha_high, original_df):
+    # Define the mapping from original values to new alpha parameters
+    value_to_alpha = {
+        0.00191: alpha_low,
+        0.00767: alpha_high,
+        0.0038: alpha_med
+    }
+    # Check if each value is present at least once in the DataFrame
+    for original_value in value_to_alpha.keys():
+        if not (original_df == original_value).any().any():
+            raise ValueError(f"Value {original_value} not found in the input DataFrame.")
+    # Create a new DataFrame based on the original one
+    new_df = original_df.copy()
+    # Apply the mapping to each element in the DataFrame
+    for original_value, new_value in value_to_alpha.items():
+        new_df = new_df.replace(original_value, new_value)
+    return new_df
+def preprocess_dataFrame(df, headerRow_idx=0, numRowsStart_idx = None, numRowsEnd_idx=None, numColsStart_idx=None, numColsEnd_idx=None, rowNames_idx=None):
+  df.columns = df.iloc[headerRow_idx] #Set the header
+  if rowNames_idx is not None:
+    df.index = df.iloc[:, rowNames_idx] #Set the row names
+  df = df.iloc[numRowsStart_idx : numRowsEnd_idx, numColsStart_idx:numColsEnd_idx] #Slice the dataset to numerical data
+  return df
+def timeit(f):
+    def timed(*args, **kw):
+        ts = time.time()
+        result = f(*args, **kw)
+        te = time.time()
+        print ('func:%r args:[%r, %r] took: %2.4f sec' % \
+          (f.__name__, te-ts))
+          #(f.__name__, args, kw, te-ts))
+        return result
+    return timed
+def timing_decorator(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        duration = end_time - start_time
+        timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+        print(f"{func.__name__} took {duration:.4f} seconds. Finished at {timestamp}")
+        return result
+    return wrapper
+# Function to compare two dataframes after converting and rounding
+def compare_dataframes(df1, df2, decimals=8):
+    # Function to convert DataFrame columns to float and then round
+    def convert_and_round_dataframe(df, decimals):
+        # Convert all columns to float
+        df_float = df.astype(float)
+        # Round to the specified number of decimals
+        return df_float.round(decimals)
+    rounded_df1 = convert_and_round_dataframe(df1, decimals)
+    rounded_df2 = convert_and_round_dataframe(df2, decimals)
+    are_equal = rounded_df1.equals(rounded_df2)
+    print("Both methods are equal:", are_equal)
+    print("Numba shape:", df2.shape)
+    print("Original shape:", df1.shape)
+    print("======== ORIGINAL OUTPUT (first item in output list, head() for the first 5 columns)")
+    print(df1.iloc[0:5].head(2))
+    print("======== New method OUTPUT (first item in output list, head() for the first 5 columns)")
+    print(df2.iloc[0:5].head(2))
+def align_dataframes(df1, df2, key):
+    """
+    Align two dataframes based on a common key, ensuring that both dataframes
+    have only the rows with matching keys.
+    Parameters:
+    - df1: First dataframe.
+    - df2: Second dataframe.
+    - key: Column name to align dataframes on.
+    Returns:
+    - df1_aligned, df2_aligned: Tuple of aligned dataframes.
+    """
+    common_ids = df1.index.intersection(df2[key])
+    df1_aligned = df1.loc[common_ids]
+    df2_aligned = df2[df2[key].isin(common_ids)].set_index(key, drop=False)
+    return df1_aligned, df2_aligned