3
^^:                 @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlZddlm	Z	m
Z
 ddlmZ ddlmZ dd	lmZ G d
d dZdd ZG dd dZddiZdd ZG dd de	eZdd Zd ddZd!ddZedkre  dS )"zGa similarities / code duplication command line tool and pylint checker
    N)defaultdict)getopt)groupby)BaseCheckertable_lines_from_stats)IRawChecker)Table)decoding_streamc               @   sL   e Zd ZdZdddZdddZd	d
 Zdd Zdd Zdd Z	dd Z
dS )Similarz,finds copy-pasted lines of code in a project   Fc             C   s"   || _ || _|| _|| _g | _d S )N)	min_linesignore_commentsignore_docstringsignore_importslinesets)selfr   r   r   r    r   9/tmp/pip-build-8app2_gc/pylint/pylint/checkers/similar.py__init__$   s
    zSimilar.__init__Nc             C   sZ   |dkr|j }nt||j }y$| jjt|| | j| j| j W n tk
rT   Y nX dS )z(append a file to search for similaritiesN)		readlinesr	   r   appendLineSetr   r   r   UnicodeDecodeError)r   Zstreamidstreamencodingr   r   r   r   append_stream1   s    zSimilar.append_streamc             C   s   | j | j  dS )z<start looking for similarities and display results on stdoutN)_display_sims_compute_sims)r   r   r   r   runD   s    zSimilar.runc             C   s   t t}x|| j D ]p\}}}}}|| }xX|D ]:}||f|ksJ||f|kr.|j||f |j||f P q.W |j||f||fh qW g }	x0|j D ]$\}}
x|
D ]}|	j||f qW qW |	j  |	j  |	S )z&compute similarities in appended files)r   list
_iter_simsaddr   itemssortreverse)r   Zno_duplicatesnumlineset1Zidx1lineset2Zidx2Z	duplicatecouplessimsZ	ensemblesr   r   r   r   H   s     

zSimilar._compute_simsc       	      C   s   d}x|D ]\}}t   t |dt|d t|}d }}x"|D ]\}}t d|j|f  q@W |rx(|j|||  D ]}t d|j  qvW ||t|d  7 }q
W tdd	 | jD }t d
|||d | f  dS )z'display computed similarities on stdoutr   zsimilar lines infilesNz==%s:%sz     c             S   s   g | ]}t |qS r   )len).0linesetr   r   r   
<listcomp>j   s    z)Similar._display_sims.<locals>.<listcomp>z)TOTAL lines=%s duplicates=%s percent=%.2fg      Y@)printr,   sortedname_real_linesrstripsumr   )	r   r)   Znb_lignes_dupliqueesr%   r(   r.   idxlineZnb_total_lignesr   r   r   r   \   s$    zSimilar._display_simsc             c   s   |j }|j }|j}d}| j}x|t|k rd}d}	x||| D ]}
d}xtt||||
D ]L\}	\\}}\}}||kr||kr|	||||
fV  t||	}P |r`|d7 }q`W |	d7 }	||kr|	||||
fV  t||	}q@W ||7 }qW dS )z+find similarities in the two given linesetsr   r+   N)enumerate_strippedfindr   r,   	enumeratezipmax)r   r&   r'   Zlines1Zlines2r9   Zindex1r   skipr%   Zindex2Z	non_blank_Zline1Zline2r   r   r   _find_commont   s0    (
zSimilar._find_commonc             c   s\   xVt | jdd D ]@\}}x6| j|d d D ] }x| j||D ]
}|V  qBW q0W qW dS )zWiterate on similarities among all files, by making a cartesian
        product
        Nr+   )r:   r   r?   )r   r6   r.   r'   simr   r   r   r       s    zSimilar._iter_sims)r   FFF)N)__name__
__module____qualname____doc__r   r   r   r   r   r?   r    r   r   r   r   r
   !   s      

r
   c                s   |rBt jdj| }dd |jD }dd t|dd dD }d	}g }d
}	xt| ddD ]\}
  j  |r|	 rt fdddD r d
d }	 dd
  |	r j|	rd
}	d |r|j	|
|}|rd |r j
ddd j  |j  qXW |S )z\return lines with leading/trailing whitespace and any ignored code
    features removed
     c             s   s&   | ]}|j t|tjtjffV  qd S )N)lineno
isinstanceastroidImport
ImportFrom)r-   noder   r   r   	<genexpr>   s   z!stripped_lines.<locals>.<genexpr>c             S   s$   i | ]\}}t d d |D |qS )c             s   s   | ]\}}|V  qd S )Nr   )r-   r>   Z	is_importr   r   r   rM      s    z,stripped_lines.<locals>.<dictcomp>.<genexpr>)all)r-   rG   Znode_is_import_groupr   r   r   
<dictcomp>   s   z"stripped_lines.<locals>.<dictcomp>c             S   s   | d S )Nr   r   )xr   r   r   <lambda>   s    z stripped_lines.<locals>.<lambda>)keyFNr+   )startc             3   s   | ]} j |V  qd S )N)
startswith)r-   i)r7   r   r   rM      s    """'''r"""r'''   #r   )rV   rW   rX   rY   )rI   parsejoinbodyr   r:   stripanyendswithgetsplitr   )linesr   r   r   treeZnode_is_import_by_linenoZline_begins_importZcurrent_line_is_importZstrippedlinesZ	docstringrG   r   )r7   r   stripped_lines   s<    

rf   c               @   s\   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dddZ
dd Zdd ZdS )r   z7Holds and indexes all the lines of a single source fileFc             C   s*   || _ || _t||||| _| j | _d S )N)r2   r3   rf   _stripped_lines	_mk_index_index)r   r2   rd   r   r   r   r   r   r   r      s
    zLineSet.__init__c             C   s
   d| j  S )Nz<Lineset for %s>)r2   )r   r   r   r   __str__   s    zLineSet.__str__c             C   s
   t | jS )N)r,   r3   )r   r   r   r   __len__   s    zLineSet.__len__c             C   s
   | j | S )N)rg   )r   indexr   r   r   __getitem__   s    zLineSet.__getitem__c             C   s   | j |j k S )N)r2   )r   otherr   r   r   __lt__   s    zLineSet.__lt__c             C   s   t | S )N)id)r   r   r   r   __hash__   s    zLineSet.__hash__r   c             c   sB   |}|r| j |d }n| j }x|D ]}||fV  |d7 }q$W dS )zgreturn an iterator on stripped lines, starting from a given index
        if specified, else 0
        Nr+   )rg   )r   Zstart_atr6   rd   r7   r   r   r   r8      s    

zLineSet.enumerate_strippedc             C   s   | j j|f S )z7return positions of the given stripped line in this set)ri   rb   )r   Zstripped_liner   r   r   r9      s    zLineSet.findc             C   s6   t t}x(t| jD ]\}}|r|| j| qW |S )zcreate the index for this set)r   r   r:   rg   r   )r   rl   Zline_nor7   r   r   r   rh      s
    zLineSet._mk_indexN)FFF)r   )rB   rC   rD   rE   r   rj   rk   rm   ro   rq   r8   r9   rh   r   r   r   r   r      s     
	
r   R0801Similar lines in %s files
%sduplicate-codeIndicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication.c             C   s6   ddddg}|t ||d7 }| jt|dddd	 d
S )z/make a layout with some stats about duplicationrF   nowprevious
differencenb_duplicated_linespercent_duplicated_linesr   r+   )childrencolsZrheadersZcheadersN)ry   rz   )r   r   r   )sectstatsZ	old_statsrd   r   r   r   report_similarities  s    r   c            	   @   s   e Zd ZdZefZdZeZddddddfd	d
ddddfdd
ddddfddddddffZ	dde
ffZd ddZd!ddZdd Zdd Zdd ZdS )"SimilarCheckerzchecks for similarities and duplicated code. This computation may be
    memory / CPU intensive, so you should disable it if you experiment some
    problems.
    Zsimilaritieszmin-similarity-linesr   intz<int>z%Minimum lines number of a similarity.)defaulttypemetavarhelpzignore-commentsTZynz<y or n>z,Ignore comments when computing similarities.zignore-docstringsz.Ignore docstrings when computing similarities.zignore-importsFz+Ignore imports when computing similarities.ZRP0801ZDuplicationNc             C   s(   t j| | tj| dddd d | _d S )Nr   T)r   r   r   )r   r   r
   r~   )r   linterr   r   r   r   O  s    zSimilarChecker.__init__c             C   sd   t j| |||| |dkr&| jj| _n:|dkr:| jj| _n&|dkrN| jj| _n|dkr`| jj| _dS )zmethod called to set an option (registered in the options list)

        overridden to report options setting to Similar
        zmin-similarity-lineszignore-commentszignore-docstringszignore-importsN)r   
set_optionconfigZmin_similarity_linesr   r   r   r   )r   optnamevalueactionoptdictr   r   r   r   V  s    zSimilarChecker.set_optionc             C   s   g | _ | jjddd| _dS )z<init the checkers: reset linesets and statistics informationr   )ry   rz   N)r   r   Z	add_statsr~   )r   r   r   r   opene  s    zSimilarChecker.openc             C   s,   |j  }| j| jj||j W dQ R X dS )zprocess a module

        the module's content is accessible via the stream object

        stream must implement the readlines method
        N)r   r   r   Zcurrent_namefile_encoding)r   rL   r   r   r   r   process_modulel  s    
zSimilarChecker.process_modulec       
      C   s   t dd | jD }d}| j}x| j D ]\}}g }d }}x$|D ]\}}|jd|j|f  qBW |j  |rx(|j|||  D ]}	|j|	j  qW | j	dt
|dj|fd ||t
|d	  7 }q(W ||d
< |o|d | |d< dS )zAcompute and display similarities on closing (i.e. end of parsing)c             s   s   | ]}t |V  qd S )N)r,   )r-   r.   r   r   r   rM   x  s    z'SimilarChecker.close.<locals>.<genexpr>r   Nz==%s:%srr   
)argsr+   ry   g      Y@rz   )r5   r   r~   r   r   r2   r#   r3   r4   Zadd_messager,   r]   )
r   totalZ
duplicatedr~   r%   r(   msgr.   r6   r7   r   r   r   closev  s     zSimilarChecker.close)N)NN)rB   rC   rD   rE   r   Z__implements__r2   MSGSZmsgsoptionsr   Zreportsr   r   r   r   r   r   r   r   r   r     s<   


r   c             C   s   | j t|  dS )z.required method to auto register this checker N)Zregister_checkerr   )r   r   r   r   register  s    r   c             C   s$   t d t   t d tj|  dS )z&display command line usage informationz*finds copy pasted blocks in a set of filesz~Usage: symilar [-d|--duplicates min_duplicated_lines] [-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...N)r0   sysexit)statusr   r   r   usage  s
    r   c             C   s   | dkrt jdd } d}d}d	}d
}d
}d
}t| ||\}}xZ|D ]R\}	}
|	dkr^t|
}qD|	dkrnt  qD|	dkr|d}qD|	dkrd}qD|	dkrDd}qDW |std t||||}x,|D ]$}t|}|j|| W dQ R X qW |j  t j	d dS )z$standalone command line access pointNr+   Zhdir   duplicates=ignore-commentsignore-importsignore-docstringsr   F-d--duplicates-h--help-i--ignore-commentsT--ignore-docstrings--ignore-importsr   )r   r   r   r   r   )r   r   )r   r   )r   r   )r   )r   )
r   argvr   r   r   r
   r   r   r   r   )r   Zs_optsZl_optsr   r   r   r   optsr   optvalrA   filenamer   r   r   r   Run  s@        


r   __main__)rs   rt   ru   )r   )N)rE   r   collectionsr   r   	itertoolsr   rI   Zpylint.checkersr   r   Zpylint.interfacesr   Zpylint.reporters.ureports.nodesr   Zpylint.utilsr	   r
   rf   r   r   r   r   r   r   r   rB   r   r   r   r   <module>   s.   {,=  
s

'