o
    Ch)r                  	   @   s  d Z ddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZ ddlm  mZ ddlmZ ddlZddlmZ ddlmZ ddlmZmZ ejd	d
 Zejdd Zejdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd  Z(d!d" Z)d#d$ Z*d%d& Z+d'd( Z,ej-j.d)d*d+d, Z/ej-j.d)d*d-d. Z0d/d0 Z1d1d2 Z2d3d4 Z3d5d6 Z4e5d7d8d9 Z6d:d; Z7d<d= Z8ej-9d>ej:e;d?e<d?dej=gd@dA Z>ej-9dBdCdDgdEdF Z?dGdH Z@dIdJ ZAdKdL ZBej-9dMdCdDgdNdO ZCej-9dMdCdDgdPdQ ZDej-9dRdSdTgej-9dMdCdDgdUdV ZEej-9dRdSdTgej-9dWejFejGgdXdY ZHdZd[ ZId\d] ZJej-Kd^d_d` ZLej-Kd^dadb ZMej-Kd^dcdd ZNdedf ZOdgdh ZPej-9dig djeGg dkfg dleGg dmfgdndo ZQdpdq ZRdrds ZSej-9dtejTejUej;gdudv ZVdwdx ZWdydz ZXd{d| ZYd}d~ ZZdd Z[dd Z\dd Z]dd Z^dS )z
This module tests the functionality of StringArray and ArrowStringArray.
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
    N)using_string_dtype)HAS_PYARROW)pa_version_under12p0pa_version_under19p0)is_dtype_equal)StringArrayNumpySemantics)ArrowStringArrayArrowStringArrayNumpySemanticsc                 C      | \}}t j||dS )zKFixture giving StringDtype from parametrized storage and na_value argumentsstoragena_valuepdStringDtype)string_dtype_argumentsr   r    r   y/var/www/html/myvaluetrips/my_value_trip_new/venv/lib/python3.10/site-packages/pandas/tests/arrays/string_/test_string.pydtype   s   r   c                 C   r
   )Nr   r   )string_dtype_arguments2r   r   r   r   r   dtype2%   s   r   c                 C   s   |   S )z3Fixture giving array type from parametrized 'dtype')construct_array_typer   r   r   r   cls+   s   r   c                 C   s   t r!tjdtjdtjdtjdtjdtjdtjdtjdg}ntjdtjdtjdtjdg}|| }||}|t|| S )Npythonr   pyarrow)r   r   r   npnanNAindexmax)dtype1r   DTYPE_HIERARCHYh1h2r   r   r   string_dtype_highest_priority1   s   

r&   c                  C   sZ   t d tt td} W d    n1 sw   Y  | tjdtjdks+J d S )Nr   pyarrow_numpyr   )	pytestimportorskiptmassert_produces_warningFutureWarningr   r   r   r   r   r   r   r   test_dtype_constructorD   s
   
r-   c                  C   s   t d td} td}tjdtjd}| tjdtjdks#J | |ks)J | |ks/J |tjdtjdks;J || ksAJ ||ksGJ |tjdtjdksSJ |tjdtddks`J || ksfJ ||kslJ d S )Nr   r   r   r   )r(   r)   r   r   r   r   r   float)r"   r   dtype3r   r   r   test_dtype_equalityL   s   


r0   c                 C   s  t dt jdt jdg| di}| jtju rd}nd}t||ks#J | jtju r,d}nd}t|j|ks7J | j	d	krK| jt ju rKd
}d| d}n0| j	d	kr_| jtju r_d}d| d}n| j	dkrs| jtju rsd}d| d}nd}d| d}t|jj|ksJ d S )NAabr   z     A
0    a
1  NaN
2    bz      A
0     a
1  <NA>
2     bz.0      a
1    NaN
2      b
Name: A, dtype: strz40       a
1    <NA>
2       b
Name: A, dtype: stringr   r   <z+>
['a', <NA>, 'b']
Length: 3, dtype: stringr	   z'>
['a', nan, 'b']
Length: 3, dtype: strr   r   StringArray)
r   	DataFramearrayr   r   r   r   reprr1   r   )r   dfexpectedarr_namer   r   r   	test_repra   s*    r<   c                 C   s:   | j g d|d}|d d usJ |d |jju sJ d S )N)r2   Nr3   r      )_from_sequencer   r   )r   r   r2   r   r   r   test_none_to_nan~   s   r?   c                 C   s   | j ddg|d}d}tjt|d d|d< W d    n1 s!w   Y  d}tjt|d td	d
g|d d < W d    d S 1 sFw   Y  d S )Nr2   r3   r   z!Invalid value '10' for dtype 'strmatch
   r   zInvalid value for dtype 'strr=      )r>   r(   raises	TypeErrorr   r7   )r   r   arrmsgr   r   r   test_setitem_validates   s   
"rH   c                 C   s<   t jddg| d}d|d< t jddg| d}t|| d S )Nr2   cr   dr   r   r7   r*   assert_extension_array_equal)r   rF   r:   r   r   r   test_setitem_with_scalar_string   s   rM   c                 C   sf   t jg d| d}tdd g}| }||ddg< t jdt jdg| d}t|| t|| d S )Nr2   r3   rI   r   r1   r   r=   rI   )r   r7   r   copyr   r*   rL   assert_numpy_array_equal)r   rF   value
value_origr:   r   r   r   $test_setitem_with_array_with_missing   s   rS   c                 C   s   t t jddd}d |d< || }t|j| sJ |d}t|| ||jd  }|| }t|j| s:J ||j}t|| d S )N2000   )periodsr   zdatetime64[ns])	r   Series
date_rangeastyper   r   r*   assert_series_equaliloc)r   sercastedresultser2casted2result2r   r   r   test_astype_roundtrip   s   


rc   c                 C   s   t jg d| d}t jg d| d}|| }t jg d| d}t|| ||}t|| ||}t jg d| d}t|| |j|dd}t jg d| d}t|| d S )	N)r2   r3   rI   NNr   )xyNzN)axbyNNN)xaybNNN-)
fill_value)rg   rh   zc-z-zN)r   rX   r*   r[   addradd)r   r2   r3   r_   r:   r   r   r   test_add   s   

ro   c                 C   s   | j dkrd}tjjd |d}|| tjg d| d}tjg dgtd}tj	t
dd ||  W d    n1 s=w   Y  t|}tj	t
dd ||  W d    d S 1 s^w   Y  d S )Nr   z*Failed: DID NOT RAISE <class 'ValueError'>rD   reasonrN   r   z3 != 1r@   )r   r(   markxfailapplymarkerr   r7   r   objectrD   
ValueErrorrX   )r   requestrq   rr   r2   r3   sr   r   r   test_add_2d   s   




"ry   c                 C   sj   t jg d| d}g d}|| }t jg d| d}t|| || }t jg d| d}t|| d S )N)r2   r3   NNr   )rd   Nre   N)rg   NNN)ri   NNNrK   )r   r2   otherr_   r:   r   r   r   test_add_sequence   s   r{   c                 C   sP   t jg d| d}|d }t jg d| d}t|| d| }t|| d S )Nr2   r3   Nr   rC   )aabbNrK   )r   r2   r_   r:   r   r   r   test_mul   s   r   zGH-28527)rq   c                 C   s   t jg d| d}t jg dgtd}||tu sJ || }t g dg| }t|| || }t g dg| }t|| d S )N)r2   r3   rI   rJ   r   )tre   vw)atrh   cvdw)tarj   vcwd)	r   r7   r6   ru   __add__NotImplementedrZ   r*   assert_frame_equalr   rF   r9   r_   r:   r   r   r   test_add_strings   s   r   c                 C   s   t jddtjtjg| d}t dtjdtjgg}||tu s"J || }t dtjtjtjgg| }t	|| || }t dtjtjtjgg| }t	|| d S )Nr2   r3   r   rd   re   rg   ri   )
r   r7   r   r   r6   r   r   rZ   r*   r   r   r   r   r   test_add_frame  s     r   c                    s   d| j  d tjg d|d}dt| }|jtju rFt fdd|D }| tjkr6d|d< nd	|d< t	
||tj d S |jd
krMdnd}tj fdd|D td}tj||d}t	|| d S )N__r2   NrI   r   r2   c                       g | ]	}t | qS r   getattr.0itemop_namerz   r   r   
<listcomp>      z2test_comparison_methods_scalar.<locals>.<listcomp>Tr=   Fr   boolean[pyarrow]booleanc                    r   r   r   r   r   r   r   r   !  r   )__name__r   r7   r   r   r   r   operatorner*   rP   rZ   bool_r   ru   rL   )comparison_opr   r2   r_   r:   expected_dtyper   r   r   test_comparison_methods_scalar  s   

r   c                 C   s   d| j  d}tjg d|d}t||tj}|jtju r:tj	| kr+tg d}ntg d}t
|| d S |jdkrAdnd}tjg d	|d}t
|| t
|| d S )
Nr   r   r   TTTFFFr   r   r   )NNN)r   r   r7   r   r   r   r   r   r   r   r*   rP   r   rL   )r   r   r   r2   r_   r:   r   r   r   r   $test_comparison_methods_scalar_pd_na&  s   
r   c           	      C   s   d| j  d}tjg d|d}d}|dvr7tjtdd t||| W d    d S 1 s0w   Y  d S t|||}|jtj	u r\g dg d	d| }t|}t
|| d S g d
g dd| }|jdkrndnd}tj||d}t
|| d S )Nr   r   r   *   )__eq____ne__z(Invalid comparison|not supported betweenr@   r   r   )FNF)TNTr   r   r   )r   r   r7   r(   rD   rE   r   r   r   r   r*   rP   r   rL   )	r   r   r   r2   rz   r_   expected_datar:   r   r   r   r   )test_comparison_methods_scalar_not_string8  s2   

r   c                 C   s&  d| j  d}tjg d|d}tjg d|d}| ||}| ||}t|| |jtju r^|jtju r^tj	| krBtg d}ntg d}t
|d ||d |d< t|| d S t||}	|	jdkrkd	}
nd
}
tjt|d dd}t
|d ||d |d< tj||
d}t|| d S )Nr   r   r   NNrI   TTFr   rW   r   r   bool[pyarrow]ru   rl   r   )r   r   r7   r*   assert_equalr   r   r   r   r   r   rP   r&   r   fulllenrL   )r   r   r   r   r2   rz   r_   rb   r:   	max_dtyper   r   r   r   test_comparison_methods_arrayV  s(   




r   r   c           
      C   s   dd l }d| j d}t| }tjg d|d}tjg d|d}| ||}| ||}t|| tjg ddd}	t|d ||d |	d< t	||	 d S )	Nr   r   r   r   r   )NNTr   rW   )
r   r   r   
ArrowDtypestringr7   r*   r   r   rL   )
r   r   par   r   r2   rz   r_   rb   r:   r   r   r   -test_comparison_methods_array_arrow_extensionv  s   

r   c           	      C   s  d| j  d}tjg d|d}g d}| ||}| ||}t|| |jtju rStj	| kr7tg d}ntg d}t
|d ||d |d< t|| d S |jdkrZd	nd
}tjt|d dd}t
|d ||d |d< tj||d}t|| d S )Nr   r   r   r   r   r   rW   r   r   r   ru   r   )r   r   r7   r*   r   r   r   r   r   r   r   rP   r   r   r   rL   )	r   r   r   r2   rz   r_   rb   r:   r   r   r   r   test_comparison_methods_list  s"   


r   c                 C   sX  | t jju r	d}n	| tu rd}nd}tjt|d | tjddgdd W d    n1 s/w   Y  tjt|d | tg  W d    n1 sMw   Y  | t jju s\| tu rt| tjdtj	gt
d | tjdd gt
d nEtjt|d | tjdtj	gt
d W d    n1 sw   Y  tjt|d | tjdd gt
d W d    n1 sw   Y  tjt|d | tjdt jgt
d W d    n1 sw   Y  tjt|d | tjdtd	d
gt
d W d    n1 sw   Y  tjt|d | tjdtd	d
gt
d W d    d S 1 s%w   Y  d S )Nz7StringArray requires a sequence of strings or pandas.NAz?StringArrayNumpySemantics requires a sequence of strings or NaNzBUnsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArrayr@   r2   r3   S1r   NaTns)r   arraysr5   r   r(   rD   rv   r   r7   r   ru   r   
datetime64timedelta64)r   rG   r   r   r   test_constructor_raises  s:     $r   nar   c                 C   s>   t jtdt jg}tt jtjd| gdd| d S )Nr2   ru   r   )r   r   r5   r   r7   r   r*   rL   )r   r:   r   r   r   test_constructor_nan_like  s   r   rO   TFc           	      C   s   t jdt jgtd}| }t jdtjgtd}|j||| d}|tt	fv r7dd l
}||j|| dd}n|tu r@||}n||}t|| t|| d S )Nr2   r   )r   rO   r   Ttypefrom_pandas)r   r7   r   ru   rO   r   r   r>   r   r	   r   r   r   r*   rL   rP   )	rO   r   r   nan_arrexpected_inputna_arrr_   r   r:   r   r   r   test_from_sequence_no_mutate  s   
r   c                 C   s   t jg d| d}|d}tjg ddd}t|| t jdt jdg| d}| jtju r3t	}d}nt
}d}tj||d	 |d W d    d S 1 sOw   Y  d S )
N)123r   int64)r=   rC      r   r   z#cannot convert float NaN to integerzJint\(\) argument must be a string, a bytes-like object or a( real)? numberr@   )r   r7   rZ   r   r*   rP   r   r   r   rv   rE   r(   rD   )r   rF   r_   r:   errrG   r   r   r   test_astype_int  s   
"r   c                 C   sF   t jdt jdg| d}|d}t jdt jdgdd}t|| d S )Nr   r   r   Int64r=   r   )r   r7   r   rZ   r*   rL   r   rF   r_   r:   r   r   r   test_astype_nullable_int  s   
r   c                 C   sF   t jdt jdg| d}||}t jdtjdg|d}t|| d S )Nz1.1z3.3r   g?gffffff
@)r   rX   r   rZ   r   r   r*   r[   )r   any_float_dtyper]   r_   r:   r   r   r   test_astype_float  s   
r   skipnac                 C   s.   t jg d|d}|j| d}|dksJ d S )NrN   r   r   abc)r   rX   sumr   r   rF   r_   r   r   r   test_reduce	  s   r   c                 C   sD   t jg d|d}|j| d}| r|dksJ d S t |s J d S )N)Nr2   Nr3   rI   Nr   r   r   )r   rX   r   isnar   r   r   r   test_reduce_missing  s
   r   methodminr!   c                 C   sZ   t jg d|d}t|| |d}|r#| dkrdnd}||ks!J d S ||jju s+J d S )Nr2   r3   rI   Nr   r   r   r2   rI   )r   rX   r   r   r   )r   r   r   rF   r_   r:   r   r   r   test_min_max  s   r   boxc           	      C   s   |j dkr!|tju r!|tju rd}nd}tjjt|d}|| |g d|d}tt	| |}| dkr6dnd	}||ks>J d S )
Nr   z<'<=' not supported between instances of 'str' and 'NoneType'z0'ArrowStringArray' object has no attribute 'max'rp   r   r   r   r2   rI   )
r   r   r7   r(   rr   rs   rE   rt   r   r   )	r   r   r   rw   rq   rr   rF   r_   r:   r   r   r   test_min_max_numpy&  s   

r   c                 C   s   t jdt jg| d}|jdd}t jddg| d}t|| |jtdd}t jddg| d}t|| d}tj	t
|d |jdd W d    d S 1 sRw   Y  d S )Nr2   r   r3   )rQ    Invalid value '1' for dtype 'strr@   r=   )r   r7   r   fillnar*   rL   r   str_r(   rD   rE   )r   rF   resr:   rG   r   r   r   test_fillna_args7  s   "r   c                 C   s   t d}dd lm} tjg d| d}||}|jt|| dd}| jdkr1t	r1|
|}| jdkr>||| }||sEJ d S )Nr   r   rN   r   Tr   r   )r(   r)   pyarrow.computecomputer   r7   listlarge_stringr   r   chunked_arraycastr   equals)r   r   pcdatarF   r:   r   r   r   test_arrow_arrayI  s   



r   z0ignore:Passing a BlockManager:DeprecationWarningc           	      C   s6  t d}tjg d| d}td|i}||}| jdkr*|djdks)J n
|djdks4J t	d| |
 }W d    n1 sIw   Y  | jtju ra|sa|d jd	ks_J d S t|d jtjslJ |tj|| jd
}|r|jtj|tjd
|_t|| |jd |d jju sJ d S )Nr   r|   r   r2   r   r   r   string_storageru   r   )rC   r2   )r(   r)   r   r7   r6   tabler   fieldr   option_context	to_pandasr   r   r   r   
isinstancer   rZ   columnsr*   r   loc	r   r   using_infer_stringr   r   r9   r   r_   r:   r   r   r   test_arrow_roundtripX  s(   



r   c                 C   sx   t d}|d|jg d| di}| }| r)ts)tjdg didd}ntjdg didd}t	
|| d S )Nr   r2   r|   r   strr   ru   )r(   r)   r   r7   r   r   r   r   r6   r*   r   )r   r   r   r_   r:   r   r   r   test_arrow_from_stringt  s   
 r   c           	      C   s:  t d}tjg | d}td|i}||}| jdkr(|djdks'J n
|djdks2J |j|j	g |
 dg|jd}td	| | }W d    n1 sWw   Y  | jtju rpt sp|d jd
ksnJ d S t|d jtjs{J |tj|| jd}|r|jtj|tjd|_t|| d S )Nr   r   r2   r   r   r   r   )schemar   ru   r   )r(   r)   r   r7   r6   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r   r   rZ   r   r*   r   r   r   r   r    test_arrow_load_from_zero_chunks  s(   


 
r  c                 C   s   | j tju r	d}n
| jdkrd}nd}tjdddtjg| d}|jdd	}tjg d
|g d |dd}t	
|| |jdd	}tjddg|d d |dd}t	
|| d S )Nr   r   int64[pyarrow]r   r2   r3   r   F)dropna)rC   r=   r=   )r   r=   r   countr    r   nameTrC   r=   )r   r   r   r   r   r7   r   value_countsrX   r*   r[   )r   	exp_dtyperF   r_   r:   r   r   r   test_value_counts_na  s   
r
  c                 C   s~   | j tju r
tj}n
| jdkrd}nd}tjdddtjg| d}|jdd}tjd	d
g|d d	 |ddd }t	
|| d S )Nr   zdouble[pyarrow]Float64r2   r3   r   T)	normalizerC   r=   
proportionr  r   )r   r   r   float64r   r   rX   r   r  r*   r[   r   r	  r]   r_   r:   r   r   r    test_value_counts_with_normalize  s   
"r  zvalues, expectedrN   r   r|   )FFTc              	   C   s   t j| |d} d}tjt|dU t dd6 |  }t|| t |  }t |}t	|| t 
|  }t 
|}t|| W d    n1 sPw   Y  W d    d S W d    d S 1 shw   Y  d S )Nr   z"use_inf_as_na option is deprecatedr@   zmode.use_inf_as_naT)r   r7   r*   r+   r,   r   r   rP   rX   r[   r6   r   )valuesr:   r   rG   r_   r   r   r   test_use_inf_as_na  s    	

"r  c                 C   sr   | j tju r	d}n
| jdkrd}nd}tjg d| d}|jdd}tjg d	|d d
 |dd}t|| d S )Nr   r   r  r   )r2   r3   rI   r3   r   F)sort)r=   rC   r=   r   r  r  )	r   r   r   r   r   rX   r  r*   r[   r  r   r   r   test_value_counts_sort_false  s   
r  c                 C   sf   | j dkrtd| j   tjg d| d}d|j  k r.|   kr.|jddk s1J  J d S )Nr   znot applicable for rN   r   r   T)deep)r   r(   skipr   rX   nbytesmemory_usage)r   seriesr   r   r   test_memory_usage  s   
8r  float_dtypec                 C   s:   t jdg| d}||}t jdg|d}t|| d S )Ng?r   z0.1)r   rX   rZ   r*   r[   )r  r   r]   r_   r:   r   r   r   test_astype_from_float_dtype  s   
r  c                 C   sF   t jdt jdg| d}t|}tjd| jdgtd}t|| d S )Nr2   r3   r   )r   r7   r   r   r   ru   r*   rP   r   r   r   r   "test_to_numpy_returns_pdna_default  s   
r  c                 C   sJ   |}t jdt jdg| d}|j|d}tjd|dgtd}t|| d S )Nr2   r3   r   r   )r   r7   r   to_numpyr   ru   r*   rP   )r   nulls_fixturer   rF   r_   r:   r   r   r   test_to_numpy_na_value  s
   r   c                 C   s   t jg d| d}|ddg}t g d}t|| |dt jg}t g d}t|| |g }t g d}t|| |d|g}t g d}t|| ||g}t g d}t|| d S )Nr|   r   r2   rI   TFFTFTr   )r   rX   isinr*   r[   r   )r   fixed_now_tsrx   r_   r:   r   r   r   	test_isin  s    
r%  c                 C   sz   t jg d| d}|t jddg|d}t g d}t|| |t jdd g|d}t g d}t|| d S )Nr|   r   r2   rI   r!  r"  )r   rX   r#  r7   r*   r[   )r   r   rx   r_   r:   r   r   r   test_isin_string_array  s   r&  c                 C   s   t d}tjg d| d}|tjddgt| d}tg d}t	|| |tjdd gt| d}tg d}t	|| d S )Nr   r|   r   r2   rI   r!  r"  )
r(   r)   r   rX   r#  r7   r   r   r*   r[   )r   r   rx   r_   r:   r   r   r   test_isin_arrow_string_array+  s   
""r'  c                 C   s   t jg d| d}tg d}d ||< |jd |jju sJ t jg d| d}d}tjt|d d||< W d    d S 1 sAw   Y  d S )NrN   r   )FTFr=   r   r@   )	r   rX   r   r7   r   r   r(   rD   rE   )r   r]   maskrG   r   r   r   (test_setitem_scalar_with_mask_validation8  s   
"r)  c                 C   sD   g d}t j|t jd}tj|| d}tj|| d}t|| d S NrN   r   )r   r7   r   r   r*   rL   r   valsrF   r_   r:   r   r   r   test_from_numpy_strI  s
   r-  c                 C   s2   g d}t j|| d}| }|}t|| d S r*  )r   r7   tolistr*   r   r+  r   r   r   test_tolistQ  s
   r/  )___doc__r   numpyr   r(   pandas._configr   pandas.compatr   pandas.compat.pyarrowr   r   pandas.util._test_decoratorsutil_test_decoratorstdpandas.core.dtypes.commonr   pandasr   pandas._testing_testingr*   pandas.core.arrays.string_r   pandas.core.arrays.string_arrowr   r	   fixturer   r   r   r&   r-   r0   r<   r?   rH   rM   rS   rc   ro   ry   r{   r   rr   rs   r   r   r   r   r   r   
skip_if_nor   r   r   parametrizer   r  r.   r   r   r   r   r   r   r   r   r   rX   r7   r   r   r   filterwarningsr   r   r  r
  r  r  r  r  float16float32r  r  r   r%  r&  r'  r)  r-  r/  r   r   r   r   <module>   s    


	


 
$#



	








