[teeworlds] Investigating high CPU usage

  • From: Bruno Tarquini <btarquini@xxxxxxxxx>
  • To: teeworlds@xxxxxxxxxxxxx
  • Date: Sat, 14 Feb 2009 00:37:25 +0100

Hi!  

    When I play teeworlds, my CPU was always 100% EVEN with VSYNC set.
CPU was even 100% in settings pages, which sounded weird.So I looked the
code, did profiling... and found that 75% of the time was spend in
libGLcore:

4925733  75.7125  libGLcore.so.173.14.15   (no symbols)
105697    1.6246  nvidia                   (no symbols)
93014     1.4297  libm-2.6.1.so            cosf
82255     1.2643  teeworlds                gfx_text_ex
74637     1.1472  zero                     (no symbols)
69720     1.0717  libm-2.6.1.so            sinf

Finally, i disabled gfx_finish and my CPU became happy :)

here some test results:

in video settings page:
-----------------------

vsync 0 finish 0 => fps 125 cpu 85%
vsync 0 finish 1 => fps 100 cpu 90%
vsync 1 finish 0 => fps  60 cpu 25%  <-- here
vsync 1 finish 1 => fps  60 cpu 95%

in game:
--------

vsync 0 finish 0 => fps 150 cpu 85%
vsync 0 finish 1 => fps 110 cpu 90% <-- finish reduce my fps !
vsync 1 finish 0 => fps  60 cpu 30%
vsync 1 finish 1 => fps  60 cpu 95%

    I googled a little and found that glFinish make CPU busy-waiting for
GPU finish. In fact, calling glFinish after SDL_GL_SwapBuffers() in
ec_gfx.c:gfx_swap() sounds weird 'cause SDL_GL_SwapBuffers() should
already flush opengl commands buffers. Maybe this depends of openGL
implementation.
for me (linux):
cpu: AMD Athlon(tm) XP 2800+
gpu: NVIDIA-173.14.15 geforce FX5700

    So IMHO, glFinish() should be removed, or disabled by default or at
least, when vsync is activated. Other point should be to reduce CPU
usage when player isn't playing, maybe by increasing cpu_throttle.


During my invetigations, I make some patches:

-------------------------------------------------
Make cpu throttle adjustable.
user can make cl_cpu_throttle 10 (warn: reduce gaming performances!)

diff -r c4f5cef1bc45 src/engine/client/ec_client.c
--- a/src/engine/client/ec_client.c    Sun Feb 08 18:15:45 2009 +0100
+++ b/src/engine/client/ec_client.c    Thu Feb 12 11:30:38 2009 +0100
@@ -1816,8 +1816,10 @@
         /* be nice */
         if(config.dbg_stress)
             thread_sleep(5);
-        else if(config.cl_cpu_throttle || !gfx_window_active())
-            thread_sleep(1);
+        else if(config.cl_cpu_throttle)
+            thread_sleep(config.cl_cpu_throttle);
+        else if(!gfx_window_active())
+            thread_sleep(config.cl_cpu_throttle ?
config.cl_cpu_throttle : 1);
            
         if(config.dbg_hitch)
         {
diff -r c4f5cef1bc45 src/engine/e_config_variables.h
--- a/src/engine/e_config_variables.h    Sun Feb 08 18:15:45 2009 +0100
+++ b/src/engine/e_config_variables.h    Thu Feb 12 11:30:38 2009 +0100
@@ -9,7 +9,7 @@
 MACRO_CONFIG_STR(password, 32, "", CFGFLAG_CLIENT, "Password to the
server")
 MACRO_CONFIG_STR(logfile, 128, "", CFGFLAG_SAVE|CFGFLAG_CLIENT,
"Filename to log all output to")
 
-MACRO_CONFIG_INT(cl_cpu_throttle, 0, 0, 1, CFGFLAG_SAVE|CFGFLAG_CLIENT, "")
+MACRO_CONFIG_INT(cl_cpu_throttle, 0, 0, 100,
CFGFLAG_SAVE|CFGFLAG_CLIENT, "Adjust sleeping delay in event loop
(milliseconds)")
 MACRO_CONFIG_INT(cl_editor, 0, 0, 1, CFGFLAG_CLIENT, "")
 
 MACRO_CONFIG_INT(cl_eventthread, 0, 0, 1, CFGFLAG_CLIENT, "Enables the
usage of a thread to pump the events")




-------------------------------------------------
Optimize rotate function (oprofile showed it's 35% of cpu usage in
settings page)
factorize cos & sin calculation


diff -r f43708f5c3f9 src/engine/client/ec_gfx.c
--- a/src/engine/client/ec_gfx.c    Thu Feb 12 11:30:38 2009 +0100
+++ b/src/engine/client/ec_gfx.c    Thu Feb 12 11:54:19 2009 +0100
@@ -837,8 +837,10 @@
 {
     float x = point->x - center->x;
     float y = point->y - center->y;
-    point->x = x * cosf(rotation) - y * sinf(rotation) + center->x;
-    point->y = x * sinf(rotation) + y * cosf(rotation) + center->y;
+    float c = cosf(rotation);
+    float s = sinf(rotation);
+    point->x = x * c - y * s + center->x;
+    point->y = x * s + y * c + center->y;
 }
 
 void gfx_quads_draw(float x, float y, float w, float h)
diff -r f43708f5c3f9 src/game/client/render_map.cpp
--- a/src/game/client/render_map.cpp    Thu Feb 12 11:30:38 2009 +0100
+++ b/src/game/client/render_map.cpp    Thu Feb 12 11:54:19 2009 +0100
@@ -73,8 +73,10 @@
 {
     int x = point->x - center->x;
     int y = point->y - center->y;
-    point->x = (int)(x * cosf(rotation) - y * sinf(rotation) + center->x);
-    point->y = (int)(x * sinf(rotation) + y * cosf(rotation) + center->y);
+    float c = cosf(rotation);
+    float s = sinf(rotation);
+    point->x = (int)(x * c - y * s + center->x);
+    point->y = (int)(x * s + y * c + center->y);
 }
 
 void render_quads(QUAD *quads, int num_quads, void (*eval)(float
time_offset, int env, float *channels), int renderflags)



-------------------------------------------------
Introduce rotate4 which rotate 4 vectors at the same time.
save some trigo calculation.


diff -r f7a250eb5301 src/engine/client/ec_gfx.c
--- a/src/engine/client/ec_gfx.c    Thu Feb 12 11:55:26 2009 +0100
+++ b/src/engine/client/ec_gfx.c    Thu Feb 12 12:29:25 2009 +0100
@@ -843,6 +843,27 @@
     point->y = x * s + y * c + center->y;
 }
 
+static void rotate4(VEC3 *center, VEC3 *point1, VEC3 *point2, VEC3
*point3, VEC3 *point4)
+{
+    float x, y;
+    float c = cosf(rotation);
+    float s = sinf(rotation);
+
+#define ROT_POINT(POINT) \
+    x = (POINT)->x - center->x; \
+    y = (POINT)->y - center->y; \
+    (POINT)->x = x * c - y * s + center->x; \
+    (POINT)->y = x * s + y * c + center->y
+
+    ROT_POINT(point1);
+    ROT_POINT(point2);
+    ROT_POINT(point3);
+    ROT_POINT(point4);
+
+#undef ROT_POINT
+}
+
+
 void gfx_quads_draw(float x, float y, float w, float h)
 {
     gfx_quads_drawTL(x-w/2, y-h/2,w,h);
@@ -862,26 +883,27 @@
     vertices[num_vertices].pos.y = y;
     vertices[num_vertices].tex = texture[0];
     vertices[num_vertices].color = color[0];
-    rotate(&center, &vertices[num_vertices].pos);
 
     vertices[num_vertices + 1].pos.x = x+width;
     vertices[num_vertices + 1].pos.y = y;
     vertices[num_vertices + 1].tex = texture[1];
     vertices[num_vertices + 1].color = color[1];
-    rotate(&center, &vertices[num_vertices + 1].pos);
 
     vertices[num_vertices + 2].pos.x = x + width;
     vertices[num_vertices + 2].pos.y = y+height;
     vertices[num_vertices + 2].tex = texture[2];
     vertices[num_vertices + 2].color = color[2];
-    rotate(&center, &vertices[num_vertices + 2].pos);
 
     vertices[num_vertices + 3].pos.x = x;
     vertices[num_vertices + 3].pos.y = y+height;
     vertices[num_vertices + 3].tex = texture[3];
     vertices[num_vertices + 3].color = color[3];
-    rotate(&center, &vertices[num_vertices + 3].pos);
-   
+
+    rotate4(&center, &vertices[num_vertices].pos,
+             &vertices[num_vertices + 1].pos,
+             &vertices[num_vertices + 2].pos,
+             &vertices[num_vertices + 3].pos);
+
     add_vertices(4);
 }
 
diff -r f7a250eb5301 src/game/client/render_map.cpp
--- a/src/game/client/render_map.cpp    Thu Feb 12 11:55:26 2009 +0100
+++ b/src/game/client/render_map.cpp    Thu Feb 12 12:29:25 2009 +0100
@@ -77,6 +77,26 @@
     float s = sinf(rotation);
     point->x = (int)(x * c - y * s + center->x);
     point->y = (int)(x * s + y * c + center->y);
+}
+
+static void rotate4(POINT *center, POINT points[], float rotation)
+{
+    int x, y;
+    float c = cosf(rotation);
+    float s = sinf(rotation);
+
+#define ROT_POINT(POINT) \
+    x = (POINT)->x - center->x; \
+    y = (POINT)->y - center->y; \
+    (POINT)->x = (int)(x * c - y * s + center->x); \
+    (POINT)->y = (int)(x * s + y * c + center->y)
+
+    ROT_POINT(points);
+    ROT_POINT(points + 1);
+    ROT_POINT(points + 2);
+    ROT_POINT(points + 3);
+
+#undef ROT_POINT
 }
 
 void render_quads(QUAD *quads, int num_quads, void (*eval)(float
time_offset, int env, float *channels), int renderflags)
@@ -146,10 +166,7 @@
             rotated[3] = q->points[3];
             points = rotated;
            
-            rotate(&q->points[4], &rotated[0], rot);
-            rotate(&q->points[4], &rotated[1], rot);
-            rotate(&q->points[4], &rotated[2], rot);
-            rotate(&q->points[4], &rotated[3], rot);
+            rotate4(&q->points[4], rotated, rot);
         }
        
         gfx_quads_draw_freeform(

Regards,

Bruno



Other related posts: