Skip to content

SystemError: Negative size passed to PyBytes_FromStringAndSize #78

@SunSeaLucky

Description

@SunSeaLucky

Run on Mac OS 15.3.2, Python 3.11, wbdata 1.1.0. After calling wbdata.get_data(indicator=indicators), error happened at the import sentence.

This can be fixed by rm -rf ~/Library/Caches/wbdata.

However, I must do that every time after calling wbdata.get_data(indicator=indicators)

Traceback (most recent call last):
  File "/Users/sunsealucky/Desktop/DataSphere/Program/DataSphere.py", line 3, in <module>
    from WorldBank import WorldBank
  File "/Users/sunsealucky/Desktop/DataSphere/Program/WorldBank.py", line 1, in <module>
    import wbdata
  File "/Users/sunsealucky/miniconda3/envs/datasphere/lib/python3.11/site-packages/wbdata/__init__.py", line 19, in <module>
    get_data = get_default_client().get_data
               ^^^^^^^^^^^^^^^^^^^^
  File "/Users/sunsealucky/miniconda3/envs/datasphere/lib/python3.11/site-packages/wbdata/__init__.py", line 16, in get_default_client
    return Client()
           ^^^^^^^^
  File "<string>", line 7, in __init__
  File "/Users/sunsealucky/miniconda3/envs/datasphere/lib/python3.11/site-packages/wbdata/client.py", line 174, in __post_init__
    cache=cache.get_cache(
          ^^^^^^^^^^^^^^^^
  File "/Users/sunsealucky/miniconda3/envs/datasphere/lib/python3.11/site-packages/wbdata/cache.py", line 79, in get_cache
    cache.expire()
    ^^^^^^^^^^^^
  File "/Users/sunsealucky/miniconda3/envs/datasphere/lib/python3.11/site-packages/shelved_cache/persistent_cache.py", line 106, in __getattr__
    self.initialize_if_not_initialized()
  File "/Users/sunsealucky/miniconda3/envs/datasphere/lib/python3.11/site-packages/shelved_cache/persistent_cache.py", line 150, in initialize_if_not_initialized
    raise e
  File "/Users/sunsealucky/miniconda3/envs/datasphere/lib/python3.11/site-packages/shelved_cache/persistent_cache.py", line 128, in initialize_if_not_initialized
    for hk, (k, v) in self.persistent_dict.items():
  File "<frozen _collections_abc>", line 860, in __iter__
  File "/Users/sunsealucky/miniconda3/envs/datasphere/lib/python3.11/shelve.py", line 95, in __iter__
    for k in self.dict.keys():
             ^^^^^^^^^^^^^^^^
SystemError: Negative size passed to PyBytes_FromStringAndSize

Here is a part of my project:

import wbdata
import pandas as pd
from LLM import LLM

class WorldBank:
    def __init__(self) -> None:
        # ======================= Init word bank api ======================= #
        self.get_data = wbdata.get_data
        self.get_series = wbdata.get_series
        self.get_dataframe = wbdata.get_dataframe
        self.get_countries = wbdata.get_countries
        self.get_indicators = wbdata.get_indicators
        self.get_incomelevels = wbdata.get_incomelevels
        self.get_lendingtypes = wbdata.get_lendingtypes
        self.get_sources = wbdata.get_sources
        self.get_topics = wbdata.get_topics
        self.llm = LLM()
        with open('prompt_pattern/api/resource_prompt.txt', encoding='utf-8') as f:
            self.resource_prompt = f.read()
        with open('prompt_pattern/api/indicator_prompt.txt', encoding='utf-8') as f:
            self.indicator_prompt = f.read()
        print("WordBank API initialized.")
    
    def query_resources(self, sentence: str) -> str:        
        # ===================== Construct query content ==================== #
        content = (
            f'\nBelow is query sentence: \n{sentence}' +
            f'\nBelow is resources provided to choose from: \n{self.get_sources()}'
        )
        
        # ===================== Reason resources index ===================== #
        res = self.llm.query(f"{self.resource_prompt}\n{content}")
        if not res.isdigit():
            raise ValueError(f'Invalid model output index: "{res}". As the provided content is \n\n {content}')
        print(f'✅ Choose resource index: {res} ')
        return res
        
    def query_indicators(self, sentence: str) -> str:
        resources_index = self.query_resources(sentence=sentence)
        
        # ===================== Construct query content ==================== #
        df = pd.DataFrame(self.get_indicators(source=resources_index))        
        content = (
            f'\nBelow is query sentence: {sentence}' +
            '\n\nBelow is indicators provided to choose from: \n' + str(df[['name']])
        )
        
        # ===================== Reason resources index ===================== #
        idx:str = self.llm.query(f"{self.indicator_prompt}\n{content}")
        if not idx.isdigit():
            raise ValueError(f'Invalid model output index: "{idx}". As the provided content is \n\n {content}')  
        print(f"✅ Choose indicator index: {idx} " + df.loc[int(idx), 'id'])      
        return df.loc[int(idx), 'id']
    
    def query(self, sentence: str):
        print("🧠 Analyzing best choice of resource id and indicator id...")
        indicators = self.query_indicators(sentence=sentence)
        return pd.DataFrame(self.get_data(indicator=indicators))

Metadata

Metadata

Assignees

Labels

No labels
No labels

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions