other samples#

Sample 1#

import math

def reward_function(params):    
    ########## [reward_wheels] all_wheels_on_track ########## 
    all_wheels_on_track = params['all_wheels_on_track']
    if all_wheels_on_track :
        reward_wheels = 1 
    else:
        reward_wheels = 0
    
    ############## [reward_speed] speed #############
    speed = params['speed']
    reward_speed = speed / 4 
    
    ############## [reward_direction] direction_diff #############
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    
    # Get the current agent direction in radians
    agent_direction = math.radians(heading)
    
    # Get the coordinates of the closest waypoints
    next_point = waypoints[closest_waypoints[1]]
    prev_point = waypoints[closest_waypoints[0]]
    
    # Calculate the direction of the track 
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
    track_direction = math.degrees(track_direction)
    
    # Calculate the difference between the track direction and the heading direction of the agent
    direction_diff = abs(track_direction - heading)
    if direction_diff > 180:
        direction_diff = 360 - direction_diff
    
    # Convert the direction difference to radians
    direction_diff = math.radians(direction_diff)
    
    # Calculate the reward for direction alignment using a cosine function
    reward_direction = math.cos(direction_diff)
        
    ########### [reward weight sum] ############ 
    reward = 1*reward_wheels + 1*reward_speed + 1*reward_direction
    
    return float(reward)

Sample 2#

import math

def reward_function(params):
    all_wheels_on_track = params['all_wheels_on_track']
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']    
    marker = 0.25 * track_width
    if all_wheels_on_track :
        if distance_from_center <= marker:
            reward_1 = 1
        else :
            reward_1 = 0.5
    else :
        reward_1 = 0.1

    ############## [reward_direction] direction_diff #############
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    x = params['x']
    y = params['y']
    
    waypoints_count = len(waypoints) - 1
    repeat_count = 7
    
    reward_2 = 0
    # Get the coordinates of the closest waypoints
    for i in range(repeat_count) :
        next_point = waypoints[(closest_waypoints[1]+i) % waypoints_count]
    
        # Calculate the direction of the target waypoint
        track_direction = math.atan2(next_point[1] - y, next_point[0] - x)
        track_direction = math.degrees(track_direction)  
    
    	# Calculate the difference between the track direction and the heading direction of the car
        direction_diff = abs(track_direction - heading)
        if direction_diff > 180:
            direction_diff = 360 - direction_diff
        
        # Determine the reward based on the direction difference
        direction_diff_rad = math.radians(direction_diff)
        reward_direction = math.cos(direction_diff_rad)
        
        reward_2 += reward_direction
        
    reward_2 /= repeat_count
        
    ########### [reward weight sum] ############ 
    reward = reward_1 + reward_2
    
    return float(reward)