[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r29169 - gnunet/src/ats
From: |
gnunet |
Subject: |
[GNUnet-SVN] r29169 - gnunet/src/ats |
Date: |
Tue, 10 Sep 2013 18:40:41 +0200 |
Author: oehlmann
Date: 2013-09-10 18:40:41 +0200 (Tue, 10 Sep 2013)
New Revision: 29169
Modified:
gnunet/src/ats/gnunet-service-ats-solver_ril.c
Log:
ats_ril: solver continued
Modified: gnunet/src/ats/gnunet-service-ats-solver_ril.c
===================================================================
--- gnunet/src/ats/gnunet-service-ats-solver_ril.c 2013-09-10 15:22:11 UTC
(rev 29168)
+++ gnunet/src/ats/gnunet-service-ats-solver_ril.c 2013-09-10 16:40:41 UTC
(rev 29169)
@@ -25,6 +25,7 @@
* @author Matthias Wachs
*/
#include "platform.h"
+#include "float.h"
#include "gnunet_util_lib.h"
#include "gnunet-service-ats_addresses.h"
#include "gnunet_statistics_service.h"
@@ -33,6 +34,7 @@
#define RIL_DEFAULT_DISCOUNT_FACTOR 0.5
#define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4
#define RIL_DEFAULT_TRACE_DECAY 0.6
+#define RIL_EXPLORE_RATIO 0.1
/**
* ATS reinforcement learning solver
@@ -40,6 +42,14 @@
* General description
*/
+enum RIL_Action
+{
+ RIL_BW_DBL = 0,
+ RIL_BW_HLV = 1,
+ RIL_NUM_ACTIONS = 2
+};
+//TODO add the rest of the actions
+
/**
* Global learning parameters
*/
@@ -74,29 +84,59 @@
struct RIL_Peer_Agent *prev;
/**
+ * Environment handle
+ */
+ struct GAS_RIL_Handle *envi;
+
+ /**
* Peer ID
*/
struct GNUNET_PeerIdentity peer;
/**
+ * Whether the agent is active or not
+ */
+ int active;
+
+ /**
+ * Number of performed time-steps
+ */
+ unsigned long long step_count;
+
+ /**
* Experience matrix W
*/
double ** W;
/**
+ * Number of rows of W / Number of state-vector features
+ */
+ int m;
+
+ /**
+ * Number of columns of W / Number of actions
+ */
+ int n;
+
+ /**
* Last perceived state feature vector
*/
- double * s_t;
+ double * s_old;
/**
* Last chosen action
*/
- double * a_t;
+ int a_old;
/**
* Last eligibility trace vector
*/
double * e_t;
+
+ /**
+ * Address in use
+ */
+ struct ATS_Address * address;
};
struct RIL_Network
@@ -228,66 +268,225 @@
/**
* List of active peer-agents
*/
- struct RIL_Peer_Agent * agents_active_head;
- struct RIL_Peer_Agent * agents_active_tail;
-
- /**
- * List of paused peer-agents
- */
- struct RIL_Peer_Agent * agents_paused_head;
- struct RIL_Peer_Agent * agents_paused_tail;
+ struct RIL_Peer_Agent * agents_head;
+ struct RIL_Peer_Agent * agents_tail;
};
-enum Actions
-{
- bw_dbl,
- bw_hlv
-};
-//TODO add the rest of the actions
+
/**
* Private functions
* ---------------------------
*/
+/**
+ * Estimate the current action-value for state s and action a
+ * @param agent agent performing the estimation
+ * @param state s
+ * @param action a
+ * @return estimation value
+ */
+double
+agent_estimate_q (struct RIL_Peer_Agent *agent,
+ double *state,
+ int action)
+{
+ int i;
+ double result = 0;
+
+ for (i = 0; i < agent->m; i++)
+ {
+ result += state[i] * (agent->W)[agent->m][action];
+ }
+
+ return result;
+}
+
+int
+agent_choose_action (struct RIL_Peer_Agent *agent,
+ double *state)
+{
+ int i;
+ int max_i = -1;
+ double r;
+ double cur_q;
+ double max_q = DBL_MIN;
+
+ r = ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK,
UINT32_MAX) / (double) UINT32_MAX);
+
+ if (r < RIL_EXPLORE_RATIO)
+ {
+ return GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK,
agent->n);
+ }
+
+ for (i = 0; i < agent->m; i++)
+ {
+ cur_q = agent_estimate_q (agent, state, i);
+ if (cur_q > max_q)
+ {
+ max_q = cur_q;
+ max_i = i;
+ }
+ }
+
+ GNUNET_assert(-1 != max_i);
+
+ return max_i;
+}
+
+double *
+envi_get_state (void *s)
+{
+ int i;
+ struct GAS_RIL_Handle *solver = s;
+ struct RIL_Network *net;
+ double *state = GNUNET_malloc (sizeof (double) * solver->networks_count
* 4);
+
+ for (i = 0; i < solver->networks_count; i += 4)
+ {
+ net = (&solver->network_entries)[i];
+ state[i] = (double) net->bw_in_assigned;
+ state[i+1] = (double) net->bw_in_available;
+ state[i+2] = (double) net->bw_out_assigned;
+ state[i+3] = (double) net->bw_out_available;
+ }
+
+ return state;
+}
+
+double
+envi_get_reward ()
+{
+ //TODO implement
+ return (double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK,
UINT32_MAX) / (double) UINT32_MAX;
+}
+
void
-agent_periodic_step (void *solver,
+agent_step (struct RIL_Peer_Agent *agent)
+{
+ int a_next;
+ double *s_next;
+ double reward;
+ double delta;
+ double q_next;
+
+
+ s_next = envi_get_state(agent->envi);
+ reward = envi_get_reward();
+
+ a_next = agent_choose_action (agent, s_next);
+ q_next = agent_estimate_q(agent, s_next, a_next);
+
+ if (NULL != agent->s_old)
+ {
+ delta = reward +
+ (agent->envi->parameters.gamma * q_next) -
+ agent_estimate_q(agent, agent->s_old,
agent->a_old);
+ }
+
+ GNUNET_free(agent->s_old);
+ agent->s_old = s_next;
+ agent->a_old = a_next;
+
+ agent->step_count += 1;
+}
+
+void
+ril_periodic_step (void *s,
const struct GNUNET_SCHEDULER_TaskContext *tc)
{
/*
* iterate over active agents and do a time step
*/
- struct GAS_RIL_Handle *s = solver;
+ struct GAS_RIL_Handle *solver = s;
+ struct RIL_Peer_Agent *cur;
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n",
s->step_count);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n",
solver->step_count);
- s->step_count += 1;
- s->next_step = GNUNET_SCHEDULER_add_delayed (
- s->step_time,
- &agent_periodic_step,
+ for (cur = solver->agents_head; NULL != cur; cur = cur->next)
+ {
+ if (cur->active)
+ {
+ agent_step (cur);
+ }
+ }
+
+ solver->step_count += 1;
+ solver->next_step = GNUNET_SCHEDULER_add_delayed (
+ solver->step_time,
+ &ril_periodic_step,
solver);
}
/**
- * Whether a peer already has an agent in a list
- * @param head of list
- * @param peer in question
+ * Initialize an agent without addresses and its knowledge base
+ * @param s ril solver
+ * @param peer the one in question
+ * @return handle to the new agent
*/
-int
-list_contains_agent (struct RIL_Peer_Agent * head,
- struct GNUNET_PeerIdentity * peer)
+struct RIL_Peer_Agent *
+agent_init (void *s,
+ struct GNUNET_PeerIdentity peer)
{
+ int i;
+ struct GAS_RIL_Handle * solver = s;
+ struct RIL_Peer_Agent * agent = GNUNET_malloc (sizeof (struct
RIL_Peer_Agent));
+
+ agent->envi = solver;
+ agent->peer = peer;
+ agent->step_count = 0;
+ agent->active = GNUNET_NO;
+ agent->s_old = NULL;
+ agent->n = solver->networks_count * 4;
+ agent->m = RIL_NUM_ACTIONS;
+ agent->W = (double **) GNUNET_malloc (sizeof (double) * agent->n);
+ for (i = 0; i < agent->n; i++)
+ {
+ (agent->W)[i] = (double *) GNUNET_malloc (sizeof (double) *
agent->m);
+ }
+ agent->a_old = -1;
+ agent->e_t = NULL;
+
+ GNUNET_CONTAINER_DLL_insert (solver->agents_head, solver->agents_tail,
agent);
+
+ return agent;
+}
+
+/**
+ * Deallocate agent
+ * @param s solver handle
+ * @param agent the agent to retire
+ */
+void
+agent_die (void *s,
+ struct RIL_Peer_Agent * agent)
+{
+
+}
+
+/**
+ * Returns the agent for a peer
+ * @param s solver handle
+ * @param peer identity of the peer
+ * @return agent
+ */
+struct RIL_Peer_Agent *
+ril_get_agent (struct GAS_RIL_Handle * s,
+ struct GNUNET_PeerIdentity peer)
+{
+ struct GAS_RIL_Handle * solver = s;
struct RIL_Peer_Agent * cur;
- for (cur = head; NULL != cur; cur = cur->next)
+ for (cur = s->agents_head; NULL != cur; cur = cur->next)
{
- if (!memcmp (&(cur->peer.hashPubKey), &peer->hashPubKey,
sizeof(struct GNUNET_HashCode)))
+ if (0 == GNUNET_CRYPTO_hash_cmp (&peer.hashPubKey,
&cur->peer.hashPubKey))
{
- return GNUNET_YES;
+ return cur;
}
}
- return GNUNET_NO;
+
+ return agent_init (solver, peer);
}
/**
@@ -307,14 +506,15 @@
struct ATS_Address *address = value;
struct RIL_Peer_Agent *agent;
- if (!list_contains_agent (solver->agents_paused_head, &address->peer))
+ agent = ril_get_agent (solver, address->peer);
+
+ GNUNET_assert (agent != NULL);
+
+ if (NULL == agent->address)
{
- agent = GNUNET_malloc (sizeof (struct RIL_Peer_Agent));
- agent->peer = address->peer;
- GNUNET_CONTAINER_DLL_insert (solver->agents_paused_head,
solver->agents_paused_tail, agent);
+ agent->address = address;
}
- //TODO add address to agent
return GNUNET_YES;
}
@@ -339,13 +539,14 @@
enum GNUNET_ATS_PreferenceKind kind,
double pref_rel)
{
- //TODO implement
-
- /*
- * Probably nothing to do here. The preference is looked up during
reward calculation and does
- * not trigger anything
- */
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_change_preference()
has been called\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Preference `%s' for peer `%s' changed to %.2f \n",
+ GNUNET_ATS_print_preference_type (kind),
+ GNUNET_i2s (peer),
+ pref_rel);
+ /*
+ * Nothing to do here. Preferences are considered during reward
calculation.
+ */
}
@@ -467,7 +668,7 @@
solver->next_step = GNUNET_SCHEDULER_add_delayed (
GNUNET_TIME_relative_multiply
(GNUNET_TIME_relative_get_millisecond_ (), 1000),
- &agent_periodic_step,
+ &ril_periodic_step,
solver);
return solver;
@@ -525,7 +726,8 @@
*/
void
GAS_ril_address_delete (void *solver,
- struct ATS_Address *address, int session_only)
+ struct ATS_Address *address,
+ int session_only)
{
//TODO implement
/*
@@ -556,12 +758,14 @@
uint32_t abs_value,
double rel_value)
{
- //TODO implement
- /*
- * Like change_preference() not really interesting, since lookup
happens anyway during reward
- * calculation
- */
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_property_changed()
has been called\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "Property `%s' for peer `%s' address %p changed to %.2f
\n",
+ GNUNET_ATS_print_property_type (type),
+ GNUNET_i2s (&address->peer),
+ address, rel_value);
+ /*
+ * Nothing to do here, properties are considered in every reward
calculation
+ */
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r29169 - gnunet/src/ats,
gnunet <=