从定期回调引发错误:ValueError('必须流更新到所有现有的列(丢失:索引)')
问题描述:
我想弄清楚如何流数据从熊猫数据框。我的代码看起来像这样:从定期回调引发错误:ValueError('必须流更新到所有现有的列(丢失:索引)')
def modify_doc(doc):
df_all = pd.read_csv(data)
df_all['Date'] = pd.to_datetime(df_all['Date'])
# startup using most of the data and stream the rest
df = df_all[0:-1]
source = ColumnDataSource(df)
plot = figure(x_axis_type='datetime',
y_range=(0, 10000000),
y_axis_label='Y Label',
title="Title")
plot.line('Date', 'ALL_EXCL_FUEL', color='blue', alpha=1, source=source)
plot.line('Date', 'MOSTLY_FOOD', color='lightblue', alpha=1, source=source)
plot.line('Date', 'NON_SPECIALISED', color='grey', alpha=1, source=source)
def callback():
# hardcode update values for now
source.stream(df[-1:])
doc.add_root(plot)
doc.add_periodic_callback(callback, 50)
bokeh_app = Application(FunctionHandler(modify_doc))
然而,这导致错误:
Error thrown from periodic callback: ValueError('Must stream updates to all existing columns (missing: index)',)
我可以打印出source.data.keys:
source = ColumnDataSource(df)
print(source.data.keys())
dict_keys(['Date', 'ALL_EXCL_FUEL', 'MOSTLY_FOOD', 'NON_SPECIALISED', 'TEXTILE', 'HOUSEHOLD', 'OTHER', 'NON_STORE', 'index'])
看起来ColumnData源在内部使用数据帧索引?其他一些人也遇到了这个问题:https://github.com/bokeh/bokeh/issues/4797,虽然票已关闭。
我已经包含低于最小,完整,可验证的例子,使再生我的问题:
bokeh_server.py
import pandas as pd
from tornado.ioloop import IOLoop
import yaml
from jinja2 import Template
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Slider, Div
from bokeh.plotting import figure
from bokeh.server.server import Server
from bokeh.themes import Theme
import os
# if running locally, listen on port 5000
PORT = int(os.getenv('PORT', '5000'))
HOST = "0.0.0.0"
# this is set in the cloud foundry manifest
try:
ALLOW_WEBSOCKET_ORIGIN = os.getenv("ALLOW_WEBSOCKET_ORIGIN").split(',')
except:
ALLOW_WEBSOCKET_ORIGIN = [ 'localhost:{0}'.format(PORT) ]
print('ALLOW_WEBSOCKET_ORIGIN', ALLOW_WEBSOCKET_ORIGIN)
io_loop = IOLoop.current()
import io
data = io.StringIO("""Date,ALL_EXCL_FUEL,MOSTLY_FOOD,NON_SPECIALISED,TEXTILE,HOUSEHOLD,OTHER,NON_STORE
1986 Jan,1883154,747432,163708,267774,261453,281699,161088
1986 Feb,1819796,773161,152656,223836,246502,275121,148519
1986 Mar,1912582,797104,169440,251438,249614,292348,152638
1986 Apr,1974419,809334,170540,275975,260086,299271,159213
1986 May,1948915,800193,170173,274979,251175,297655,154740
1986 Jun,2019114,821785,178366,295463,251507,311447,160546
1986 Jul,2051539,816033,184812,297969,269786,323187,159752
1986 Aug,2011746,804386,180911,297138,263427,310220,155665
1986 Sep,2046678,792943,181055,305350,280640,318368,168322
1986 Oct,2110669,810147,187728,308919,298637,325617,179621
1986 Nov,2315710,847794,231599,352009,332079,358077,194152
1986 Dec,2830206,970987,319570,490001,373714,469399,206536
1987 Jan,2032021,798562,172215,288186,288534,307900,176624
1987 Feb,1980748,805713,165682,247219,282836,313577,165721
1987 Mar,2009717,816051,174034,256756,280207,315562,167106
1987 Apr,2156967,862749,189729,308543,284440,336755,174751
1987 May,2075808,834375,175464,287515,280404,330093,167957
1987 Jun,2137092,844051,183014,304706,286522,345149,173651
1987 Jul,2208377,847098,198848,330804,301537,356037,174054
1987 Aug,2193689,854672,186160,317375,304843,356241,174399
1987 Sep,2177927,825398,188343,317164,314681,350923,181418
1987 Oct,2281593,850022,202862,340464,334112,355424,198710
1987 Nov,2506843,892292,248366,381103,371953,397845,215285
1987 Dec,3075829,1028966,346378,533443,422524,519848,224669
1988 Jan,2267165,845068,193734,316077,354371,364295,193620
1988 Feb,2164201,864420,178627,267003,324824,351326,178001
1988 Mar,2227296,893751,192979,283258,319268,356518,181522
1988 Apr,2309954,899831,195328,312896,330680,379170,192049
1988 May,2321889,904736,193670,322577,325868,385344,189694
1988 Jun,2331091,900316,199227,330852,323326,387613,189757
1988 Jul,2443590,907775,212694,356501,363880,406913,195827
1988 Aug,2410116,913793,204410,339444,355879,405094,191497
""")
def modify_doc(doc):
df_all = pd.read_csv(data)
df_all['Date'] = pd.to_datetime(df_all['Date'])
df = df_all[0:-1]
source = ColumnDataSource(df)
plot = figure(x_axis_type='datetime',
y_range=(0, 10000000),
y_axis_label='Y Label',
title="Title")
plot.line('Date', 'ALL_EXCL_FUEL', color='blue', alpha=1, source=source)
plot.line('Date', 'MOSTLY_FOOD', color='lightblue', alpha=1, source=source)
plot.line('Date', 'NON_SPECIALISED', color='grey', alpha=1, source=source)
def callback():
# hardcode update values for now
source.stream(df[-1:])
doc.add_root(plot)
doc.add_periodic_callback(callback, 50)
bokeh_app = Application(FunctionHandler(modify_doc))
server = Server(
{'/': bokeh_app},
io_loop=io_loop,
allow_websocket_origin=ALLOW_WEBSOCKET_ORIGIN,
**{'port': PORT, 'address': HOST}
)
server.start()
if __name__ == '__main__':
io_loop.add_callback(server.show, "/")
io_loop.start()
运行
python bokeh_server.py
答
我的解决方案是将数据帧转换为一个字典:
def modify_doc(doc):
df_all = ...
start_data = df_all.to_dict(orient='list')
source = ColumnDataSource(data=start_data)
...
def callback():
...
new_data = df_new.to_dict(orient='list')
source.stream(new_data)
作为'0.12.6'的'.stream'方法只接受一个'dict'所以这是正确的答案。也许它也可以接受数据框架,请随时在GitHub上打开一个问题:https://github.com/bokeh/bokeh/issues – bigreddot